digest-xxhash 0.2.1 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/README.md +1 -2
- data/Rakefile +13 -14
- data/digest-xxhash.gemspec +21 -4
- data/ext/digest/xxhash/ext.c +2 -0
- data/ext/digest/xxhash/xxhash.h +1341 -709
- data/lib/digest/xxhash/version.rb +1 -1
- data/rakelib/alt-install-task.rake +58 -0
- data/test/test.rb +6 -0
- metadata +22 -3
data/ext/digest/xxhash/xxhash.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* xxHash - Extremely Fast Hash algorithm
|
3
3
|
* Header File
|
4
|
-
* Copyright (C) 2012-
|
4
|
+
* Copyright (C) 2012-2021 Yann Collet
|
5
5
|
*
|
6
6
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
7
7
|
*
|
@@ -32,49 +32,142 @@
|
|
32
32
|
* - xxHash homepage: https://www.xxhash.com
|
33
33
|
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
34
34
|
*/
|
35
|
+
|
35
36
|
/*!
|
36
37
|
* @mainpage xxHash
|
37
38
|
*
|
39
|
+
* xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
|
40
|
+
* limits.
|
41
|
+
*
|
42
|
+
* It is proposed in four flavors, in three families:
|
43
|
+
* 1. @ref XXH32_family
|
44
|
+
* - Classic 32-bit hash function. Simple, compact, and runs on almost all
|
45
|
+
* 32-bit and 64-bit systems.
|
46
|
+
* 2. @ref XXH64_family
|
47
|
+
* - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
|
48
|
+
* 64-bit systems (but _not_ 32-bit systems).
|
49
|
+
* 3. @ref XXH3_family
|
50
|
+
* - Modern 64-bit and 128-bit hash function family which features improved
|
51
|
+
* strength and performance across the board, especially on smaller data.
|
52
|
+
* It benefits greatly from SIMD and 64-bit without requiring it.
|
53
|
+
*
|
54
|
+
* Benchmarks
|
55
|
+
* ---
|
56
|
+
* The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
|
57
|
+
* The open source benchmark program is compiled with clang v10.0 using -O3 flag.
|
58
|
+
*
|
59
|
+
* | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |
|
60
|
+
* | -------------------- | ------- | ----: | ---------------: | ------------------: |
|
61
|
+
* | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |
|
62
|
+
* | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |
|
63
|
+
* | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |
|
64
|
+
* | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |
|
65
|
+
* | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |
|
66
|
+
* | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |
|
67
|
+
* | RAM sequential read | | N/A | 28.0 GB/s | N/A |
|
68
|
+
* | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |
|
69
|
+
* | City64 | | 64 | 22.0 GB/s | 76.6 |
|
70
|
+
* | T1ha2 | | 64 | 22.0 GB/s | 99.0 |
|
71
|
+
* | City128 | | 128 | 21.7 GB/s | 57.7 |
|
72
|
+
* | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |
|
73
|
+
* | XXH64() | | 64 | 19.4 GB/s | 71.0 |
|
74
|
+
* | SpookyHash | | 64 | 19.3 GB/s | 53.2 |
|
75
|
+
* | Mum | | 64 | 18.0 GB/s | 67.0 |
|
76
|
+
* | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |
|
77
|
+
* | XXH32() | | 32 | 9.7 GB/s | 71.9 |
|
78
|
+
* | City32 | | 32 | 9.1 GB/s | 66.0 |
|
79
|
+
* | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |
|
80
|
+
* | Murmur3 | | 32 | 3.9 GB/s | 56.1 |
|
81
|
+
* | SipHash* | | 64 | 3.0 GB/s | 43.2 |
|
82
|
+
* | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |
|
83
|
+
* | HighwayHash | | 64 | 1.4 GB/s | 6.0 |
|
84
|
+
* | FNV64 | | 64 | 1.2 GB/s | 62.7 |
|
85
|
+
* | Blake2* | | 256 | 1.1 GB/s | 5.1 |
|
86
|
+
* | SHA1* | | 160 | 0.8 GB/s | 5.6 |
|
87
|
+
* | MD5* | | 128 | 0.6 GB/s | 7.8 |
|
88
|
+
* @note
|
89
|
+
* - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
|
90
|
+
* even though it is mandatory on x64.
|
91
|
+
* - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
|
92
|
+
* by modern standards.
|
93
|
+
* - Small data velocity is a rough average of algorithm's efficiency for small
|
94
|
+
* data. For more accurate information, see the wiki.
|
95
|
+
* - More benchmarks and strength tests are found on the wiki:
|
96
|
+
* https://github.com/Cyan4973/xxHash/wiki
|
97
|
+
*
|
98
|
+
* Usage
|
99
|
+
* ------
|
100
|
+
* All xxHash variants use a similar API. Changing the algorithm is a trivial
|
101
|
+
* substitution.
|
102
|
+
*
|
103
|
+
* @pre
|
104
|
+
* For functions which take an input and length parameter, the following
|
105
|
+
* requirements are assumed:
|
106
|
+
* - The range from [`input`, `input + length`) is valid, readable memory.
|
107
|
+
* - The only exception is if the `length` is `0`, `input` may be `NULL`.
|
108
|
+
* - For C++, the objects must have the *TriviallyCopyable* property, as the
|
109
|
+
* functions access bytes directly as if it was an array of `unsigned char`.
|
110
|
+
*
|
111
|
+
* @anchor single_shot_example
|
112
|
+
* **Single Shot**
|
113
|
+
*
|
114
|
+
* These functions are stateless functions which hash a contiguous block of memory,
|
115
|
+
* immediately returning the result. They are the easiest and usually the fastest
|
116
|
+
* option.
|
117
|
+
*
|
118
|
+
* XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
|
119
|
+
*
|
120
|
+
* @code{.c}
|
121
|
+
* #include <string.h>
|
122
|
+
* #include "xxhash.h"
|
123
|
+
*
|
124
|
+
* // Example for a function which hashes a null terminated string with XXH32().
|
125
|
+
* XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
|
126
|
+
* {
|
127
|
+
* // NULL pointers are only valid if the length is zero
|
128
|
+
* size_t length = (string == NULL) ? 0 : strlen(string);
|
129
|
+
* return XXH32(string, length, seed);
|
130
|
+
* }
|
131
|
+
* @endcode
|
132
|
+
*
|
133
|
+
* @anchor streaming_example
|
134
|
+
* **Streaming**
|
135
|
+
*
|
136
|
+
* These groups of functions allow incremental hashing of unknown size, even
|
137
|
+
* more than what would fit in a size_t.
|
138
|
+
*
|
139
|
+
* XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
|
140
|
+
*
|
141
|
+
* @code{.c}
|
142
|
+
* #include <stdio.h>
|
143
|
+
* #include <assert.h>
|
144
|
+
* #include "xxhash.h"
|
145
|
+
* // Example for a function which hashes a FILE incrementally with XXH3_64bits().
|
146
|
+
* XXH64_hash_t hashFile(FILE* f)
|
147
|
+
* {
|
148
|
+
* // Allocate a state struct. Do not just use malloc() or new.
|
149
|
+
* XXH3_state_t* state = XXH3_createState();
|
150
|
+
* assert(state != NULL && "Out of memory!");
|
151
|
+
* // Reset the state to start a new hashing session.
|
152
|
+
* XXH3_64bits_reset(state);
|
153
|
+
* char buffer[4096];
|
154
|
+
* size_t count;
|
155
|
+
* // Read the file in chunks
|
156
|
+
* while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
|
157
|
+
* // Run update() as many times as necessary to process the data
|
158
|
+
* XXH3_64bits_update(state, buffer, count);
|
159
|
+
* }
|
160
|
+
* // Retrieve the finalized hash. This will not change the state.
|
161
|
+
* XXH64_hash_t result = XXH3_64bits_digest(state);
|
162
|
+
* // Free the state. Do not use free().
|
163
|
+
* XXH3_freeState(state);
|
164
|
+
* return result;
|
165
|
+
* }
|
166
|
+
* @endcode
|
167
|
+
*
|
38
168
|
* @file xxhash.h
|
39
169
|
* xxHash prototypes and implementation
|
40
170
|
*/
|
41
|
-
/* TODO: update */
|
42
|
-
/* Notice extracted from xxHash homepage:
|
43
|
-
|
44
|
-
xxHash is an extremely fast hash algorithm, running at RAM speed limits.
|
45
|
-
It also successfully passes all tests from the SMHasher suite.
|
46
|
-
|
47
|
-
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
|
48
|
-
|
49
|
-
Name Speed Q.Score Author
|
50
|
-
xxHash 5.4 GB/s 10
|
51
|
-
CrapWow 3.2 GB/s 2 Andrew
|
52
|
-
MurmurHash 3a 2.7 GB/s 10 Austin Appleby
|
53
|
-
SpookyHash 2.0 GB/s 10 Bob Jenkins
|
54
|
-
SBox 1.4 GB/s 9 Bret Mulvey
|
55
|
-
Lookup3 1.2 GB/s 9 Bob Jenkins
|
56
|
-
SuperFastHash 1.2 GB/s 1 Paul Hsieh
|
57
|
-
CityHash64 1.05 GB/s 10 Pike & Alakuijala
|
58
|
-
FNV 0.55 GB/s 5 Fowler, Noll, Vo
|
59
|
-
CRC32 0.43 GB/s 9
|
60
|
-
MD5-32 0.33 GB/s 10 Ronald L. Rivest
|
61
|
-
SHA1-32 0.28 GB/s 10
|
62
|
-
|
63
|
-
Q.Score is a measure of quality of the hash function.
|
64
|
-
It depends on successfully passing SMHasher test set.
|
65
|
-
10 is a perfect score.
|
66
|
-
|
67
|
-
Note: SMHasher's CRC32 implementation is not the fastest one.
|
68
|
-
Other speed-oriented implementations can be faster,
|
69
|
-
especially in combination with PCLMUL instruction:
|
70
|
-
https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
|
71
|
-
|
72
|
-
A 64-bit version, named XXH64, is available since r35.
|
73
|
-
It offers much better speed, but for 64-bit applications only.
|
74
|
-
Name Speed on 64 bits Speed on 32 bits
|
75
|
-
XXH64 13.8 GB/s 1.9 GB/s
|
76
|
-
XXH32 6.8 GB/s 6.0 GB/s
|
77
|
-
*/
|
78
171
|
|
79
172
|
#if defined (__cplusplus)
|
80
173
|
extern "C" {
|
@@ -84,21 +177,53 @@ extern "C" {
|
|
84
177
|
* INLINE mode
|
85
178
|
******************************/
|
86
179
|
/*!
|
87
|
-
*
|
180
|
+
* @defgroup public Public API
|
181
|
+
* Contains details on the public xxHash functions.
|
182
|
+
* @{
|
183
|
+
*/
|
184
|
+
#ifdef XXH_DOXYGEN
|
185
|
+
/*!
|
186
|
+
* @brief Exposes the implementation and marks all functions as `inline`.
|
187
|
+
*
|
88
188
|
* Use these build macros to inline xxhash into the target unit.
|
89
189
|
* Inlining improves performance on small inputs, especially when the length is
|
90
190
|
* expressed as a compile-time constant:
|
91
191
|
*
|
92
|
-
*
|
192
|
+
* https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
|
93
193
|
*
|
94
194
|
* It also keeps xxHash symbols private to the unit, so they are not exported.
|
95
195
|
*
|
96
196
|
* Usage:
|
197
|
+
* @code{.c}
|
97
198
|
* #define XXH_INLINE_ALL
|
98
199
|
* #include "xxhash.h"
|
99
|
-
*
|
200
|
+
* @endcode
|
100
201
|
* Do not compile and link xxhash.o as a separate object, as it is not useful.
|
101
202
|
*/
|
203
|
+
# define XXH_INLINE_ALL
|
204
|
+
# undef XXH_INLINE_ALL
|
205
|
+
/*!
|
206
|
+
* @brief Exposes the implementation without marking functions as inline.
|
207
|
+
*/
|
208
|
+
# define XXH_PRIVATE_API
|
209
|
+
# undef XXH_PRIVATE_API
|
210
|
+
/*!
|
211
|
+
* @brief Emulate a namespace by transparently prefixing all symbols.
|
212
|
+
*
|
213
|
+
* If you want to include _and expose_ xxHash functions from within your own
|
214
|
+
* library, but also want to avoid symbol collisions with other libraries which
|
215
|
+
* may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
|
216
|
+
* any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
|
217
|
+
* (therefore, avoid empty or numeric values).
|
218
|
+
*
|
219
|
+
* Note that no change is required within the calling program as long as it
|
220
|
+
* includes `xxhash.h`: Regular symbol names will be automatically translated
|
221
|
+
* by this header.
|
222
|
+
*/
|
223
|
+
# define XXH_NAMESPACE /* YOUR NAME HERE */
|
224
|
+
# undef XXH_NAMESPACE
|
225
|
+
#endif
|
226
|
+
|
102
227
|
#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
|
103
228
|
&& !defined(XXH_INLINE_ALL_31684351384)
|
104
229
|
/* this section should be traversed only once */
|
@@ -157,6 +282,7 @@ extern "C" {
|
|
157
282
|
# undef XXH3_64bits
|
158
283
|
# undef XXH3_64bits_withSecret
|
159
284
|
# undef XXH3_64bits_withSeed
|
285
|
+
# undef XXH3_64bits_withSecretandSeed
|
160
286
|
# undef XXH3_createState
|
161
287
|
# undef XXH3_freeState
|
162
288
|
# undef XXH3_copyState
|
@@ -174,6 +300,7 @@ extern "C" {
|
|
174
300
|
# undef XXH3_128bits_reset
|
175
301
|
# undef XXH3_128bits_reset_withSeed
|
176
302
|
# undef XXH3_128bits_reset_withSecret
|
303
|
+
# undef XXH3_128bits_reset_withSecretandSeed
|
177
304
|
# undef XXH3_128bits_update
|
178
305
|
# undef XXH3_128bits_digest
|
179
306
|
# undef XXH128_isEqual
|
@@ -211,21 +338,13 @@ extern "C" {
|
|
211
338
|
# undef XXHASH_H_STATIC_13879238742
|
212
339
|
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
|
213
340
|
|
214
|
-
|
215
|
-
|
216
341
|
/* ****************************************************************
|
217
342
|
* Stable API
|
218
343
|
*****************************************************************/
|
219
344
|
#ifndef XXHASH_H_5627135585666179
|
220
345
|
#define XXHASH_H_5627135585666179 1
|
221
346
|
|
222
|
-
|
223
|
-
/*!
|
224
|
-
* @defgroup public Public API
|
225
|
-
* Contains details on the public xxHash functions.
|
226
|
-
* @{
|
227
|
-
*/
|
228
|
-
/* specific declaration modes for Windows */
|
347
|
+
/*! @brief Marks a global symbol. */
|
229
348
|
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
|
230
349
|
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
|
231
350
|
# ifdef XXH_EXPORT
|
@@ -238,24 +357,6 @@ extern "C" {
|
|
238
357
|
# endif
|
239
358
|
#endif
|
240
359
|
|
241
|
-
#ifdef XXH_DOXYGEN
|
242
|
-
/*!
|
243
|
-
* @brief Emulate a namespace by transparently prefixing all symbols.
|
244
|
-
*
|
245
|
-
* If you want to include _and expose_ xxHash functions from within your own
|
246
|
-
* library, but also want to avoid symbol collisions with other libraries which
|
247
|
-
* may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
|
248
|
-
* any public symbol from xxhash library with the value of XXH_NAMESPACE
|
249
|
-
* (therefore, avoid empty or numeric values).
|
250
|
-
*
|
251
|
-
* Note that no change is required within the calling program as long as it
|
252
|
-
* includes `xxhash.h`: Regular symbol names will be automatically translated
|
253
|
-
* by this header.
|
254
|
-
*/
|
255
|
-
# define XXH_NAMESPACE /* YOUR NAME HERE */
|
256
|
-
# undef XXH_NAMESPACE
|
257
|
-
#endif
|
258
|
-
|
259
360
|
#ifdef XXH_NAMESPACE
|
260
361
|
# define XXH_CAT(A,B) A##B
|
261
362
|
# define XXH_NAME2(A,B) XXH_CAT(A,B)
|
@@ -284,23 +385,28 @@ extern "C" {
|
|
284
385
|
# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
|
285
386
|
# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
|
286
387
|
# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
|
388
|
+
# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
|
287
389
|
# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
|
288
390
|
# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
|
289
391
|
# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
|
290
392
|
# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
|
291
393
|
# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
|
292
394
|
# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
|
395
|
+
# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
|
293
396
|
# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
|
294
397
|
# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
|
295
398
|
# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
|
399
|
+
# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
|
296
400
|
/* XXH3_128bits */
|
297
401
|
# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
|
298
402
|
# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
|
299
403
|
# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
|
300
404
|
# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
|
405
|
+
# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
|
301
406
|
# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
|
302
407
|
# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
|
303
408
|
# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
|
409
|
+
# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
|
304
410
|
# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
|
305
411
|
# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
|
306
412
|
# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
|
@@ -310,30 +416,64 @@ extern "C" {
|
|
310
416
|
#endif
|
311
417
|
|
312
418
|
|
419
|
+
/* *************************************
|
420
|
+
* Compiler specifics
|
421
|
+
***************************************/
|
422
|
+
|
423
|
+
/* specific declaration modes for Windows */
|
424
|
+
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
|
425
|
+
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
|
426
|
+
# ifdef XXH_EXPORT
|
427
|
+
# define XXH_PUBLIC_API __declspec(dllexport)
|
428
|
+
# elif XXH_IMPORT
|
429
|
+
# define XXH_PUBLIC_API __declspec(dllimport)
|
430
|
+
# endif
|
431
|
+
# else
|
432
|
+
# define XXH_PUBLIC_API /* do nothing */
|
433
|
+
# endif
|
434
|
+
#endif
|
435
|
+
|
436
|
+
#if defined (__GNUC__)
|
437
|
+
# define XXH_CONSTF __attribute__((const))
|
438
|
+
# define XXH_PUREF __attribute__((pure))
|
439
|
+
# define XXH_MALLOCF __attribute__((malloc))
|
440
|
+
#else
|
441
|
+
# define XXH_CONSTF /* disable */
|
442
|
+
# define XXH_PUREF
|
443
|
+
# define XXH_MALLOCF
|
444
|
+
#endif
|
445
|
+
|
313
446
|
/* *************************************
|
314
447
|
* Version
|
315
448
|
***************************************/
|
316
449
|
#define XXH_VERSION_MAJOR 0
|
317
450
|
#define XXH_VERSION_MINOR 8
|
318
451
|
#define XXH_VERSION_RELEASE 1
|
452
|
+
/*! @brief Version number, encoded as two digits each */
|
319
453
|
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
|
320
454
|
|
321
455
|
/*!
|
322
456
|
* @brief Obtains the xxHash version.
|
323
457
|
*
|
324
|
-
* This is
|
325
|
-
*
|
458
|
+
* This is mostly useful when xxHash is compiled as a shared library,
|
459
|
+
* since the returned value comes from the library, as opposed to header file.
|
326
460
|
*
|
327
|
-
* @return
|
461
|
+
* @return @ref XXH_VERSION_NUMBER of the invoked library.
|
328
462
|
*/
|
329
|
-
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
|
463
|
+
XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
|
330
464
|
|
331
465
|
|
332
466
|
/* ****************************
|
333
|
-
*
|
467
|
+
* Common basic types
|
334
468
|
******************************/
|
335
469
|
#include <stddef.h> /* size_t */
|
336
|
-
|
470
|
+
/*!
|
471
|
+
* @brief Exit code for the streaming API.
|
472
|
+
*/
|
473
|
+
typedef enum {
|
474
|
+
XXH_OK = 0, /*!< OK */
|
475
|
+
XXH_ERROR /*!< Error */
|
476
|
+
} XXH_errorcode;
|
337
477
|
|
338
478
|
|
339
479
|
/*-**********************************************************************
|
@@ -357,30 +497,27 @@ typedef uint32_t XXH32_hash_t;
|
|
357
497
|
# include <limits.h>
|
358
498
|
# if UINT_MAX == 0xFFFFFFFFUL
|
359
499
|
typedef unsigned int XXH32_hash_t;
|
500
|
+
# elif ULONG_MAX == 0xFFFFFFFFUL
|
501
|
+
typedef unsigned long XXH32_hash_t;
|
360
502
|
# else
|
361
|
-
#
|
362
|
-
typedef unsigned long XXH32_hash_t;
|
363
|
-
# else
|
364
|
-
# error "unsupported platform: need a 32-bit type"
|
365
|
-
# endif
|
503
|
+
# error "unsupported platform: need a 32-bit type"
|
366
504
|
# endif
|
367
505
|
#endif
|
368
506
|
|
369
507
|
/*!
|
370
508
|
* @}
|
371
509
|
*
|
372
|
-
* @defgroup
|
510
|
+
* @defgroup XXH32_family XXH32 family
|
373
511
|
* @ingroup public
|
374
512
|
* Contains functions used in the classic 32-bit xxHash algorithm.
|
375
513
|
*
|
376
514
|
* @note
|
377
|
-
* XXH32 is
|
378
|
-
*
|
379
|
-
* systems, and offers true 64/128 bit hash results.
|
380
|
-
* level of dispersion, and greatly reduces the risks of collisions.
|
515
|
+
* XXH32 is useful for older platforms, with no or poor 64-bit performance.
|
516
|
+
* Note that the @ref XXH3_family provides competitive speed for both 32-bit
|
517
|
+
* and 64-bit systems, and offers true 64/128 bit hash results.
|
381
518
|
*
|
382
|
-
* @see @ref
|
383
|
-
* @see @ref
|
519
|
+
* @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
|
520
|
+
* @see @ref XXH32_impl for implementation details
|
384
521
|
* @{
|
385
522
|
*/
|
386
523
|
|
@@ -389,6 +526,8 @@ typedef uint32_t XXH32_hash_t;
|
|
389
526
|
*
|
390
527
|
* Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
|
391
528
|
*
|
529
|
+
* See @ref single_shot_example "Single Shot Example" for an example.
|
530
|
+
*
|
392
531
|
* @param input The block of data to be hashed, at least @p length bytes in size.
|
393
532
|
* @param length The length of @p input, in bytes.
|
394
533
|
* @param seed The 32-bit seed to alter the hash's output predictably.
|
@@ -406,8 +545,9 @@ typedef uint32_t XXH32_hash_t;
|
|
406
545
|
* @see
|
407
546
|
* XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
|
408
547
|
*/
|
409
|
-
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
|
548
|
+
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
|
410
549
|
|
550
|
+
#ifndef XXH_NO_STREAM
|
411
551
|
/*!
|
412
552
|
* Streaming functions generate the xxHash value from an incremental input.
|
413
553
|
* This method is slower than single-call functions, due to state management.
|
@@ -430,32 +570,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_
|
|
430
570
|
*
|
431
571
|
* When done, release the state using `XXH*_freeState()`.
|
432
572
|
*
|
433
|
-
*
|
434
|
-
* @code{.c}
|
435
|
-
* #include <stdio.h>
|
436
|
-
* #include <xxhash.h>
|
437
|
-
* #define BUFFER_SIZE 256
|
438
|
-
*
|
439
|
-
* // Note: XXH64 and XXH3 use the same interface.
|
440
|
-
* XXH32_hash_t
|
441
|
-
* hashFile(FILE* stream)
|
442
|
-
* {
|
443
|
-
* XXH32_state_t* state;
|
444
|
-
* unsigned char buf[BUFFER_SIZE];
|
445
|
-
* size_t amt;
|
446
|
-
* XXH32_hash_t hash;
|
447
|
-
*
|
448
|
-
* state = XXH32_createState(); // Create a state
|
449
|
-
* assert(state != NULL); // Error check here
|
450
|
-
* XXH32_reset(state, 0xbaad5eed); // Reset state with our seed
|
451
|
-
* while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
|
452
|
-
* XXH32_update(state, buf, amt); // Hash the file in chunks
|
453
|
-
* }
|
454
|
-
* hash = XXH32_digest(state); // Finalize the hash
|
455
|
-
* XXH32_freeState(state); // Clean up
|
456
|
-
* return hash;
|
457
|
-
* }
|
458
|
-
* @endcode
|
573
|
+
* @see streaming_example at the top of @ref xxhash.h for an example.
|
459
574
|
*/
|
460
575
|
|
461
576
|
/*!
|
@@ -472,7 +587,7 @@ typedef struct XXH32_state_s XXH32_state_t;
|
|
472
587
|
* Must be freed with XXH32_freeState().
|
473
588
|
* @return An allocated XXH32_state_t on success, `NULL` on failure.
|
474
589
|
*/
|
475
|
-
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
|
590
|
+
XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
|
476
591
|
/*!
|
477
592
|
* @brief Frees an @ref XXH32_state_t.
|
478
593
|
*
|
@@ -540,7 +655,8 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
|
|
540
655
|
*
|
541
656
|
* @return The calculated xxHash32 value from that state.
|
542
657
|
*/
|
543
|
-
XXH_PUBLIC_API XXH32_hash_t
|
658
|
+
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
|
659
|
+
#endif /* !XXH_NO_STREAM */
|
544
660
|
|
545
661
|
/******* Canonical representation *******/
|
546
662
|
|
@@ -591,39 +707,40 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
591
707
|
*
|
592
708
|
* @return The converted hash.
|
593
709
|
*/
|
594
|
-
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
710
|
+
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
595
711
|
|
596
712
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
*/
|
603
|
-
|
604
|
-
#if defined (__has_c_attribute) && defined (__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) /* C2x */
|
605
|
-
# if __has_c_attribute(fallthrough)
|
606
|
-
# define XXH_FALLTHROUGH [[fallthrough]]
|
607
|
-
# endif
|
713
|
+
#ifdef __has_attribute
|
714
|
+
# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
|
715
|
+
#else
|
716
|
+
# define XXH_HAS_ATTRIBUTE(x) 0
|
717
|
+
#endif
|
608
718
|
|
609
|
-
|
610
|
-
#
|
611
|
-
#
|
612
|
-
#
|
719
|
+
/* C-language Attributes are added in C23. */
|
720
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
721
|
+
# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
722
|
+
#else
|
723
|
+
# define XXH_HAS_C_ATTRIBUTE(x) 0
|
613
724
|
#endif
|
614
725
|
|
615
|
-
#
|
616
|
-
#
|
617
|
-
#
|
618
|
-
#
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
726
|
+
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
727
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
728
|
+
#else
|
729
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) 0
|
730
|
+
#endif
|
731
|
+
|
732
|
+
/*
|
733
|
+
* Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
|
734
|
+
* introduced in CPP17 and C23.
|
735
|
+
* CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
736
|
+
* C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
|
737
|
+
*/
|
738
|
+
#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
|
739
|
+
# define XXH_FALLTHROUGH [[fallthrough]]
|
740
|
+
#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
|
741
|
+
# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
|
742
|
+
#else
|
743
|
+
# define XXH_FALLTHROUGH /* fallthrough */
|
627
744
|
#endif
|
628
745
|
|
629
746
|
/*!
|
@@ -662,18 +779,17 @@ typedef uint64_t XXH64_hash_t;
|
|
662
779
|
/*!
|
663
780
|
* @}
|
664
781
|
*
|
665
|
-
* @defgroup
|
782
|
+
* @defgroup XXH64_family XXH64 family
|
666
783
|
* @ingroup public
|
667
784
|
* @{
|
668
785
|
* Contains functions used in the classic 64-bit xxHash algorithm.
|
669
786
|
*
|
670
787
|
* @note
|
671
788
|
* XXH3 provides competitive speed for both 32-bit and 64-bit systems,
|
672
|
-
* and offers true 64/128 bit hash results.
|
673
|
-
*
|
789
|
+
* and offers true 64/128 bit hash results.
|
790
|
+
* It provides better speed for systems with vector processing capabilities.
|
674
791
|
*/
|
675
792
|
|
676
|
-
|
677
793
|
/*!
|
678
794
|
* @brief Calculates the 64-bit hash of @p input using xxHash64.
|
679
795
|
*
|
@@ -697,32 +813,35 @@ typedef uint64_t XXH64_hash_t;
|
|
697
813
|
* @see
|
698
814
|
* XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
|
699
815
|
*/
|
700
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
|
816
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
|
701
817
|
|
702
818
|
/******* Streaming *******/
|
819
|
+
#ifndef XXH_NO_STREAM
|
703
820
|
/*!
|
704
821
|
* @brief The opaque state struct for the XXH64 streaming API.
|
705
822
|
*
|
706
823
|
* @see XXH64_state_s for details.
|
707
824
|
*/
|
708
825
|
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
|
709
|
-
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
|
826
|
+
XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
|
710
827
|
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
|
711
828
|
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
|
712
829
|
|
713
830
|
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
|
714
831
|
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
|
715
|
-
XXH_PUBLIC_API XXH64_hash_t
|
716
|
-
|
832
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
|
833
|
+
#endif /* !XXH_NO_STREAM */
|
717
834
|
/******* Canonical representation *******/
|
718
835
|
typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
|
719
836
|
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
|
720
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
|
837
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
|
838
|
+
|
839
|
+
#ifndef XXH_NO_XXH3
|
721
840
|
|
722
841
|
/*!
|
723
842
|
* @}
|
724
843
|
* ************************************************************************
|
725
|
-
* @defgroup
|
844
|
+
* @defgroup XXH3_family XXH3 family
|
726
845
|
* @ingroup public
|
727
846
|
* @{
|
728
847
|
*
|
@@ -742,12 +861,14 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
742
861
|
*
|
743
862
|
* XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
|
744
863
|
* but does not require it.
|
745
|
-
*
|
746
|
-
*
|
747
|
-
*
|
864
|
+
* Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
|
865
|
+
* at competitive speeds, even without vector support. Further details are
|
866
|
+
* explained in the implementation.
|
748
867
|
*
|
749
868
|
* Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
|
750
|
-
* ZVector and scalar targets. This can be controlled via the XXH_VECTOR
|
869
|
+
* ZVector and scalar targets. This can be controlled via the @ref XXH_VECTOR
|
870
|
+
* macro. For the x86 family, an automatic dispatcher is included separately
|
871
|
+
* in @ref xxh_x86dispatch.c.
|
751
872
|
*
|
752
873
|
* XXH3 implementation is portable:
|
753
874
|
* it has a generic C90 formulation that can be compiled on any platform,
|
@@ -763,24 +884,42 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
763
884
|
*
|
764
885
|
* The API supports one-shot hashing, streaming mode, and custom secrets.
|
765
886
|
*/
|
766
|
-
|
767
887
|
/*-**********************************************************************
|
768
888
|
* XXH3 64-bit variant
|
769
889
|
************************************************************************/
|
770
890
|
|
771
|
-
|
772
|
-
*
|
773
|
-
*
|
774
|
-
|
891
|
+
/*!
|
892
|
+
* @brief 64-bit unseeded variant of XXH3.
|
893
|
+
*
|
894
|
+
* This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
|
895
|
+
* it may have slightly better performance due to constant propagation of the
|
896
|
+
* defaults.
|
897
|
+
*
|
898
|
+
* @see
|
899
|
+
* XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
|
900
|
+
* @see
|
901
|
+
* XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
|
902
|
+
* @see
|
903
|
+
* XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
|
904
|
+
*/
|
905
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(const void* input, size_t length);
|
775
906
|
|
776
|
-
|
777
|
-
*
|
778
|
-
*
|
779
|
-
*
|
907
|
+
/*!
|
908
|
+
* @brief 64-bit seeded variant of XXH3
|
909
|
+
*
|
910
|
+
* This variant generates a custom secret on the fly based on default secret
|
911
|
+
* altered using the `seed` value.
|
912
|
+
*
|
780
913
|
* While this operation is decently fast, note that it's not completely free.
|
781
|
-
*
|
914
|
+
*
|
915
|
+
* @note
|
916
|
+
* seed == 0 produces the same results as @ref XXH3_64bits().
|
917
|
+
*
|
918
|
+
* @param input The data to hash
|
919
|
+
* @param length The length
|
920
|
+
* @param seed The 64-bit seed to alter the state.
|
782
921
|
*/
|
783
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void*
|
922
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(const void* input, size_t length, XXH64_hash_t seed);
|
784
923
|
|
785
924
|
/*!
|
786
925
|
* The bare minimum size for a custom secret.
|
@@ -791,23 +930,29 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
|
|
791
930
|
*/
|
792
931
|
#define XXH3_SECRET_SIZE_MIN 136
|
793
932
|
|
794
|
-
|
795
|
-
*
|
933
|
+
/*!
|
934
|
+
* @brief 64-bit variant of XXH3 with a custom "secret".
|
935
|
+
*
|
796
936
|
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
|
797
937
|
* This makes it more difficult for an external actor to prepare an intentional collision.
|
798
938
|
* The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
|
799
|
-
* However, the quality of
|
800
|
-
*
|
939
|
+
* However, the quality of the secret impacts the dispersion of the hash algorithm.
|
940
|
+
* Therefore, the secret _must_ look like a bunch of random bytes.
|
801
941
|
* Avoid "trivial" or structured data such as repeated sequences or a text document.
|
802
|
-
* Whenever
|
803
|
-
* consider
|
804
|
-
*
|
805
|
-
*
|
942
|
+
* Whenever in doubt about the "randomness" of the blob of bytes,
|
943
|
+
* consider employing "XXH3_generateSecret()" instead (see below).
|
944
|
+
* It will generate a proper high entropy secret derived from the blob of bytes.
|
945
|
+
* Another advantage of using XXH3_generateSecret() is that
|
946
|
+
* it guarantees that all bits within the initial blob of bytes
|
947
|
+
* will impact every bit of the output.
|
948
|
+
* This is not necessarily the case when using the blob of bytes directly
|
949
|
+
* because, when hashing _small_ inputs, only a portion of the secret is employed.
|
806
950
|
*/
|
807
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
951
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
808
952
|
|
809
953
|
|
810
954
|
/******* Streaming *******/
|
955
|
+
#ifndef XXH_NO_STREAM
|
811
956
|
/*
|
812
957
|
* Streaming requires state maintenance.
|
813
958
|
* This operation costs memory and CPU.
|
@@ -821,7 +966,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
|
|
821
966
|
* @see XXH3_state_s for details.
|
822
967
|
*/
|
823
968
|
typedef struct XXH3_state_s XXH3_state_t;
|
824
|
-
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
|
969
|
+
XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
|
825
970
|
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
|
826
971
|
XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
|
827
972
|
|
@@ -837,7 +982,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
|
|
837
982
|
* digest will be equivalent to `XXH3_64bits_withSeed()`.
|
838
983
|
*/
|
839
984
|
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
|
840
|
-
|
985
|
+
/*!
|
841
986
|
* XXH3_64bits_reset_withSecret():
|
842
987
|
* `secret` is referenced, it _must outlive_ the hash streaming session.
|
843
988
|
* Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
|
@@ -849,7 +994,8 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr,
|
|
849
994
|
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
|
850
995
|
|
851
996
|
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
|
852
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
|
997
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
|
998
|
+
#endif /* !XXH_NO_STREAM */
|
853
999
|
|
854
1000
|
/* note : canonical representation of XXH3 is the same as XXH64
|
855
1001
|
* since they both produce XXH64_hash_t values */
|
@@ -870,11 +1016,31 @@ typedef struct {
|
|
870
1016
|
XXH64_hash_t high64; /*!< `value >> 64` */
|
871
1017
|
} XXH128_hash_t;
|
872
1018
|
|
873
|
-
|
874
|
-
|
875
|
-
|
1019
|
+
/*!
|
1020
|
+
* @brief Unseeded 128-bit variant of XXH3
|
1021
|
+
*
|
1022
|
+
* The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
|
1023
|
+
* for shorter inputs.
|
1024
|
+
*
|
1025
|
+
* This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
|
1026
|
+
* it may have slightly better performance due to constant propagation of the
|
1027
|
+
* defaults.
|
1028
|
+
*
|
1029
|
+
* @see
|
1030
|
+
* XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
|
1031
|
+
* @see
|
1032
|
+
* XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
|
1033
|
+
* @see
|
1034
|
+
* XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
|
1035
|
+
*/
|
1036
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(const void* data, size_t len);
|
1037
|
+
/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
|
1038
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
|
1039
|
+
/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
|
1040
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
876
1041
|
|
877
1042
|
/******* Streaming *******/
|
1043
|
+
#ifndef XXH_NO_STREAM
|
878
1044
|
/*
|
879
1045
|
* Streaming requires state maintenance.
|
880
1046
|
* This operation costs memory and CPU.
|
@@ -892,7 +1058,8 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr,
|
|
892
1058
|
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
|
893
1059
|
|
894
1060
|
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
|
895
|
-
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
|
1061
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
|
1062
|
+
#endif /* !XXH_NO_STREAM */
|
896
1063
|
|
897
1064
|
/* Following helper functions make it possible to compare XXH128_hast_t values.
|
898
1065
|
* Since XXH128_hash_t is a structure, this capability is not offered by the language.
|
@@ -902,26 +1069,26 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
|
|
902
1069
|
* XXH128_isEqual():
|
903
1070
|
* Return: 1 if `h1` and `h2` are equal, 0 if they are not.
|
904
1071
|
*/
|
905
|
-
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
|
1072
|
+
XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
|
906
1073
|
|
907
1074
|
/*!
|
908
|
-
*
|
909
|
-
*
|
1075
|
+
* @brief Compares two @ref XXH128_hash_t
|
910
1076
|
* This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
|
911
1077
|
*
|
912
|
-
* return: >0 if *h128_1 > *h128_2
|
913
|
-
*
|
914
|
-
*
|
1078
|
+
* @return: >0 if *h128_1 > *h128_2
|
1079
|
+
* =0 if *h128_1 == *h128_2
|
1080
|
+
* <0 if *h128_1 < *h128_2
|
915
1081
|
*/
|
916
|
-
XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
|
1082
|
+
XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(const void* h128_1, const void* h128_2);
|
917
1083
|
|
918
1084
|
|
919
1085
|
/******* Canonical representation *******/
|
920
1086
|
typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
|
921
1087
|
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
|
922
|
-
XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
|
1088
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
|
923
1089
|
|
924
1090
|
|
1091
|
+
#endif /* !XXH_NO_XXH3 */
|
925
1092
|
#endif /* XXH_NO_LONG_LONG */
|
926
1093
|
|
927
1094
|
/*!
|
@@ -962,13 +1129,10 @@ XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t*
|
|
962
1129
|
struct XXH32_state_s {
|
963
1130
|
XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
|
964
1131
|
XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
|
965
|
-
XXH32_hash_t
|
966
|
-
XXH32_hash_t v2; /*!< Second accumulator lane */
|
967
|
-
XXH32_hash_t v3; /*!< Third accumulator lane */
|
968
|
-
XXH32_hash_t v4; /*!< Fourth accumulator lane */
|
1132
|
+
XXH32_hash_t v[4]; /*!< Accumulator lanes */
|
969
1133
|
XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
|
970
1134
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
|
971
|
-
XXH32_hash_t reserved; /*!< Reserved field. Do not read
|
1135
|
+
XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
|
972
1136
|
}; /* typedef'd to XXH32_state_t */
|
973
1137
|
|
974
1138
|
|
@@ -988,16 +1152,15 @@ struct XXH32_state_s {
|
|
988
1152
|
*/
|
989
1153
|
struct XXH64_state_s {
|
990
1154
|
XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
|
991
|
-
XXH64_hash_t
|
992
|
-
XXH64_hash_t v2; /*!< Second accumulator lane */
|
993
|
-
XXH64_hash_t v3; /*!< Third accumulator lane */
|
994
|
-
XXH64_hash_t v4; /*!< Fourth accumulator lane */
|
1155
|
+
XXH64_hash_t v[4]; /*!< Accumulator lanes */
|
995
1156
|
XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
|
996
1157
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
|
997
1158
|
XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
|
998
|
-
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it
|
1159
|
+
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
|
999
1160
|
}; /* typedef'd to XXH64_state_t */
|
1000
1161
|
|
1162
|
+
#ifndef XXH_NO_XXH3
|
1163
|
+
|
1001
1164
|
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
|
1002
1165
|
# include <stdalign.h>
|
1003
1166
|
# define XXH_ALIGN(n) alignas(n)
|
@@ -1063,14 +1226,14 @@ struct XXH64_state_s {
|
|
1063
1226
|
*/
|
1064
1227
|
struct XXH3_state_s {
|
1065
1228
|
XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
|
1066
|
-
/*!< The 8 accumulators.
|
1229
|
+
/*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
|
1067
1230
|
XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
|
1068
1231
|
/*!< Used to store a custom secret generated from a seed. */
|
1069
1232
|
XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
|
1070
1233
|
/*!< The internal buffer. @see XXH32_state_s::mem32 */
|
1071
1234
|
XXH32_hash_t bufferedSize;
|
1072
1235
|
/*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
|
1073
|
-
XXH32_hash_t
|
1236
|
+
XXH32_hash_t useSeed;
|
1074
1237
|
/*!< Reserved field. Needed for padding on 64-bit. */
|
1075
1238
|
size_t nbStripesSoFar;
|
1076
1239
|
/*!< Number or stripes processed. */
|
@@ -1106,45 +1269,156 @@ struct XXH3_state_s {
|
|
1106
1269
|
#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
|
1107
1270
|
|
1108
1271
|
|
1272
|
+
/*!
|
1273
|
+
* simple alias to pre-selected XXH3_128bits variant
|
1274
|
+
*/
|
1275
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
|
1276
|
+
|
1277
|
+
|
1109
1278
|
/* === Experimental API === */
|
1110
1279
|
/* Symbols defined below must be considered tied to a specific library version. */
|
1111
1280
|
|
1112
|
-
|
1281
|
+
/*!
|
1113
1282
|
* XXH3_generateSecret():
|
1114
1283
|
*
|
1115
1284
|
* Derive a high-entropy secret from any user-defined content, named customSeed.
|
1116
1285
|
* The generated secret can be used in combination with `*_withSecret()` functions.
|
1117
|
-
* The `_withSecret()` variants are useful to provide a higher level of protection
|
1118
|
-
* as it becomes much more difficult for an external actor to
|
1286
|
+
* The `_withSecret()` variants are useful to provide a higher level of protection
|
1287
|
+
* than 64-bit seed, as it becomes much more difficult for an external actor to
|
1288
|
+
* guess how to impact the calculation logic.
|
1119
1289
|
*
|
1120
1290
|
* The function accepts as input a custom seed of any length and any content,
|
1121
|
-
* and derives from it a high-entropy secret of length
|
1122
|
-
*
|
1123
|
-
* The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long.
|
1291
|
+
* and derives from it a high-entropy secret of length @p secretSize into an
|
1292
|
+
* already allocated buffer @p secretBuffer.
|
1124
1293
|
*
|
1125
1294
|
* The generated secret can then be used with any `*_withSecret()` variant.
|
1126
|
-
*
|
1127
|
-
*
|
1295
|
+
* The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
|
1296
|
+
* @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
|
1128
1297
|
* are part of this list. They all accept a `secret` parameter
|
1129
|
-
* which must be
|
1298
|
+
* which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
|
1130
1299
|
* _and_ feature very high entropy (consist of random-looking bytes).
|
1131
|
-
* These conditions can be a high bar to meet, so
|
1132
|
-
*
|
1300
|
+
* These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
|
1301
|
+
* be employed to ensure proper quality.
|
1133
1302
|
*
|
1134
|
-
* customSeed can be anything. It can have any size, even small ones,
|
1135
|
-
* and its content can be anything, even
|
1136
|
-
* The resulting `secret` will nonetheless provide all
|
1303
|
+
* @p customSeed can be anything. It can have any size, even small ones,
|
1304
|
+
* and its content can be anything, even "poor entropy" sources such as a bunch
|
1305
|
+
* of zeroes. The resulting `secret` will nonetheless provide all required qualities.
|
1306
|
+
*
|
1307
|
+
* @pre
|
1308
|
+
* - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
|
1309
|
+
* - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
|
1137
1310
|
*
|
1138
|
-
*
|
1139
|
-
*
|
1311
|
+
* Example code:
|
1312
|
+
* @code{.c}
|
1313
|
+
* #include <stdio.h>
|
1314
|
+
* #include <stdlib.h>
|
1315
|
+
* #include <string.h>
|
1316
|
+
* #define XXH_STATIC_LINKING_ONLY // expose unstable API
|
1317
|
+
* #include "xxhash.h"
|
1318
|
+
* // Hashes argv[2] using the entropy from argv[1].
|
1319
|
+
* int main(int argc, char* argv[])
|
1320
|
+
* {
|
1321
|
+
* char secret[XXH3_SECRET_SIZE_MIN];
|
1322
|
+
* if (argv != 3) { return 1; }
|
1323
|
+
* XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
|
1324
|
+
* XXH64_hash_t h = XXH3_64bits_withSecret(
|
1325
|
+
* argv[2], strlen(argv[2]),
|
1326
|
+
* secret, sizeof(secret)
|
1327
|
+
* );
|
1328
|
+
* printf("%016llx\n", (unsigned long long) h);
|
1329
|
+
* }
|
1330
|
+
* @endcode
|
1140
1331
|
*/
|
1141
|
-
XXH_PUBLIC_API
|
1142
|
-
|
1332
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
|
1143
1333
|
|
1144
|
-
|
1145
|
-
|
1334
|
+
/*!
|
1335
|
+
* @brief Generate the same secret as the _withSeed() variants.
|
1336
|
+
*
|
1337
|
+
* The generated secret can be used in combination with
|
1338
|
+
*`*_withSecret()` and `_withSecretandSeed()` variants.
|
1339
|
+
*
|
1340
|
+
* Example C++ `std::string` hash class:
|
1341
|
+
* @code{.cpp}
|
1342
|
+
* #include <string>
|
1343
|
+
* #define XXH_STATIC_LINKING_ONLY // expose unstable API
|
1344
|
+
* #include "xxhash.h"
|
1345
|
+
* // Slow, seeds each time
|
1346
|
+
* class HashSlow {
|
1347
|
+
* XXH64_hash_t seed;
|
1348
|
+
* public:
|
1349
|
+
* HashSlow(XXH64_hash_t s) : seed{s} {}
|
1350
|
+
* size_t operator()(const std::string& x) const {
|
1351
|
+
* return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
|
1352
|
+
* }
|
1353
|
+
* };
|
1354
|
+
* // Fast, caches the seeded secret for future uses.
|
1355
|
+
* class HashFast {
|
1356
|
+
* unsigned char secret[XXH3_SECRET_SIZE_MIN];
|
1357
|
+
* public:
|
1358
|
+
* HashFast(XXH64_hash_t s) {
|
1359
|
+
* XXH3_generateSecret_fromSeed(secret, seed);
|
1360
|
+
* }
|
1361
|
+
* size_t operator()(const std::string& x) const {
|
1362
|
+
* return size_t{
|
1363
|
+
* XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
|
1364
|
+
* };
|
1365
|
+
* }
|
1366
|
+
* };
|
1367
|
+
* @endcode
|
1368
|
+
* @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
|
1369
|
+
* @param seed The seed to seed the state.
|
1370
|
+
*/
|
1371
|
+
XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
|
1146
1372
|
|
1373
|
+
/*!
|
1374
|
+
* These variants generate hash values using either
|
1375
|
+
* @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
|
1376
|
+
* or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
|
1377
|
+
*
|
1378
|
+
* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
|
1379
|
+
* `_withSeed()` has to generate the secret on the fly for "large" keys.
|
1380
|
+
* It's fast, but can be perceptible for "not so large" keys (< 1 KB).
|
1381
|
+
* `_withSecret()` has to generate the masks on the fly for "small" keys,
|
1382
|
+
* which requires more instructions than _withSeed() variants.
|
1383
|
+
* Therefore, _withSecretandSeed variant combines the best of both worlds.
|
1384
|
+
*
|
1385
|
+
* When @p secret has been generated by XXH3_generateSecret_fromSeed(),
|
1386
|
+
* this variant produces *exactly* the same results as `_withSeed()` variant,
|
1387
|
+
* hence offering only a pure speed benefit on "large" input,
|
1388
|
+
* by skipping the need to regenerate the secret for every large input.
|
1389
|
+
*
|
1390
|
+
* Another usage scenario is to hash the secret to a 64-bit hash value,
|
1391
|
+
* for example with XXH3_64bits(), which then becomes the seed,
|
1392
|
+
* and then employ both the seed and the secret in _withSecretandSeed().
|
1393
|
+
* On top of speed, an added benefit is that each bit in the secret
|
1394
|
+
* has a 50% chance to swap each bit in the output, via its impact to the seed.
|
1395
|
+
*
|
1396
|
+
* This is not guaranteed when using the secret directly in "small data" scenarios,
|
1397
|
+
* because only portions of the secret are employed for small data.
|
1398
|
+
*/
|
1399
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
|
1400
|
+
XXH3_64bits_withSecretandSeed(const void* data, size_t len,
|
1401
|
+
const void* secret, size_t secretSize,
|
1402
|
+
XXH64_hash_t seed);
|
1403
|
+
/*! @copydoc XXH3_64bits_withSecretandSeed() */
|
1404
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
|
1405
|
+
XXH3_128bits_withSecretandSeed(const void* input, size_t length,
|
1406
|
+
const void* secret, size_t secretSize,
|
1407
|
+
XXH64_hash_t seed64);
|
1408
|
+
#ifndef XXH_NO_STREAM
|
1409
|
+
/*! @copydoc XXH3_64bits_withSecretandSeed() */
|
1410
|
+
XXH_PUBLIC_API XXH_errorcode
|
1411
|
+
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
|
1412
|
+
const void* secret, size_t secretSize,
|
1413
|
+
XXH64_hash_t seed64);
|
1414
|
+
/*! @copydoc XXH3_64bits_withSecretandSeed() */
|
1415
|
+
XXH_PUBLIC_API XXH_errorcode
|
1416
|
+
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
|
1417
|
+
const void* secret, size_t secretSize,
|
1418
|
+
XXH64_hash_t seed64);
|
1419
|
+
#endif /* !XXH_NO_STREAM */
|
1147
1420
|
|
1421
|
+
#endif /* !XXH_NO_XXH3 */
|
1148
1422
|
#endif /* XXH_NO_LONG_LONG */
|
1149
1423
|
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
|
1150
1424
|
# define XXH_IMPLEMENTATION
|
@@ -1198,7 +1472,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1198
1472
|
/*!
|
1199
1473
|
* @brief Define this to disable 64-bit code.
|
1200
1474
|
*
|
1201
|
-
* Useful if only using the @ref
|
1475
|
+
* Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
|
1202
1476
|
*/
|
1203
1477
|
# define XXH_NO_LONG_LONG
|
1204
1478
|
# undef XXH_NO_LONG_LONG /* don't actually */
|
@@ -1221,7 +1495,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1221
1495
|
* Use `memcpy()`. Safe and portable. Note that most modern compilers will
|
1222
1496
|
* eliminate the function call and treat it as an unaligned access.
|
1223
1497
|
*
|
1224
|
-
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((
|
1498
|
+
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
|
1225
1499
|
* @par
|
1226
1500
|
* Depends on compiler extensions and is therefore not portable.
|
1227
1501
|
* This method is safe _if_ your compiler supports it,
|
@@ -1248,22 +1522,40 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1248
1522
|
* care, as what works on one compiler/platform/optimization level may cause
|
1249
1523
|
* another to read garbage data or even crash.
|
1250
1524
|
*
|
1251
|
-
* See
|
1525
|
+
* See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
|
1252
1526
|
*
|
1253
1527
|
* Prefer these methods in priority order (0 > 3 > 1 > 2)
|
1254
1528
|
*/
|
1255
1529
|
# define XXH_FORCE_MEMORY_ACCESS 0
|
1530
|
+
|
1256
1531
|
/*!
|
1257
|
-
* @def
|
1258
|
-
* @brief
|
1532
|
+
* @def XXH_SIZE_OPT
|
1533
|
+
* @brief Controls how much xxHash optimizes for size.
|
1259
1534
|
*
|
1260
|
-
*
|
1261
|
-
*
|
1535
|
+
* xxHash, when compiled, tends to result in a rather large binary size. This
|
1536
|
+
* is mostly due to heavy usage to forced inlining and constant folding of the
|
1537
|
+
* @ref XXH3_family to increase performance.
|
1262
1538
|
*
|
1263
|
-
*
|
1264
|
-
*
|
1539
|
+
* However, some developers prefer size over speed. This option can
|
1540
|
+
* significantly reduce the size of the generated code. When using the `-Os`
|
1541
|
+
* or `-Oz` options on GCC or Clang, this is defined to 1 by default,
|
1542
|
+
* otherwise it is defined to 0.
|
1543
|
+
*
|
1544
|
+
* Most of these size optimizations can be controlled manually.
|
1545
|
+
*
|
1546
|
+
* This is a number from 0-2.
|
1547
|
+
* - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
|
1548
|
+
* comes first.
|
1549
|
+
* - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
|
1550
|
+
* conservative and disables hacks that increase code size. It implies the
|
1551
|
+
* options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
|
1552
|
+
* and @ref XXH3_NEON_LANES == 8 if they are not already defined.
|
1553
|
+
* - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
|
1554
|
+
* Performance may cry. For example, the single shot functions just use the
|
1555
|
+
* streaming API.
|
1265
1556
|
*/
|
1266
|
-
# define
|
1557
|
+
# define XXH_SIZE_OPT 0
|
1558
|
+
|
1267
1559
|
/*!
|
1268
1560
|
* @def XXH_FORCE_ALIGN_CHECK
|
1269
1561
|
* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
|
@@ -1285,9 +1577,11 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1285
1577
|
*
|
1286
1578
|
* In these cases, the alignment check can be removed by setting this macro to 0.
|
1287
1579
|
* Then the code will always use unaligned memory access.
|
1288
|
-
* Align check is automatically disabled on x86, x64
|
1580
|
+
* Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
|
1289
1581
|
* which are platforms known to offer good unaligned memory accesses performance.
|
1290
1582
|
*
|
1583
|
+
* It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
|
1584
|
+
*
|
1291
1585
|
* This option does not affect XXH3 (only XXH32 and XXH64).
|
1292
1586
|
*/
|
1293
1587
|
# define XXH_FORCE_ALIGN_CHECK 0
|
@@ -1309,24 +1603,22 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1309
1603
|
* XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
|
1310
1604
|
* compiler full control on whether to inline or not.
|
1311
1605
|
*
|
1312
|
-
* When not optimizing (-O0),
|
1313
|
-
*
|
1606
|
+
* When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
|
1607
|
+
* @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
|
1314
1608
|
*/
|
1315
1609
|
# define XXH_NO_INLINE_HINTS 0
|
1316
1610
|
|
1317
1611
|
/*!
|
1318
|
-
* @def
|
1319
|
-
* @brief Whether to
|
1612
|
+
* @def XXH32_ENDJMP
|
1613
|
+
* @brief Whether to use a jump for `XXH32_finalize`.
|
1320
1614
|
*
|
1321
|
-
* For performance, `XXH32_finalize` uses
|
1322
|
-
*
|
1615
|
+
* For performance, `XXH32_finalize` uses multiple branches in the finalizer.
|
1616
|
+
* This is generally preferable for performance,
|
1617
|
+
* but depending on exact architecture, a jmp may be preferable.
|
1323
1618
|
*
|
1324
|
-
* This is
|
1325
|
-
* and depending on the architecture, may even be slower
|
1326
|
-
*
|
1327
|
-
* This is automatically defined with `-Os`/`-Oz` on GCC and Clang.
|
1619
|
+
* This setting is only possibly making a difference for very small inputs.
|
1328
1620
|
*/
|
1329
|
-
# define
|
1621
|
+
# define XXH32_ENDJMP 0
|
1330
1622
|
|
1331
1623
|
/*!
|
1332
1624
|
* @internal
|
@@ -1337,38 +1629,45 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1337
1629
|
*/
|
1338
1630
|
# define XXH_OLD_NAMES
|
1339
1631
|
# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
|
1632
|
+
|
1633
|
+
/*!
|
1634
|
+
* @def XXH_NO_STREAM
|
1635
|
+
* @brief Disables the streaming API.
|
1636
|
+
*
|
1637
|
+
* When xxHash is not inlined and the streaming functions are not used, disabling
|
1638
|
+
* the streaming functions can improve code size significantly, especially with
|
1639
|
+
* the @ref XXH3_family which tends to make constant folded copies of itself.
|
1640
|
+
*/
|
1641
|
+
# define XXH_NO_STREAM
|
1642
|
+
# undef XXH_NO_STREAM /* don't actually */
|
1340
1643
|
#endif /* XXH_DOXYGEN */
|
1341
1644
|
/*!
|
1342
1645
|
* @}
|
1343
1646
|
*/
|
1344
1647
|
|
1345
1648
|
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
1346
|
-
/* prefer __packed__ structures (method 1) for
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
( \
|
1351
|
-
defined(__GNUC__) && ( \
|
1352
|
-
(defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
|
1353
|
-
( \
|
1354
|
-
defined(__mips__) && \
|
1355
|
-
(__mips <= 5 || __mips_isa_rev < 6) && \
|
1356
|
-
(!defined(__mips16) || defined(__mips_mips16e2)) \
|
1357
|
-
) \
|
1358
|
-
) \
|
1359
|
-
) \
|
1360
|
-
)
|
1649
|
+
/* prefer __packed__ structures (method 1) for GCC
|
1650
|
+
* < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
|
1651
|
+
* which for some reason does unaligned loads. */
|
1652
|
+
# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
|
1361
1653
|
# define XXH_FORCE_MEMORY_ACCESS 1
|
1362
1654
|
# endif
|
1363
1655
|
#endif
|
1364
1656
|
|
1365
|
-
#ifndef
|
1366
|
-
|
1657
|
+
#ifndef XXH_SIZE_OPT
|
1658
|
+
/* default to 1 for -Os or -Oz */
|
1659
|
+
# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
|
1660
|
+
# define XXH_SIZE_OPT 1
|
1661
|
+
# else
|
1662
|
+
# define XXH_SIZE_OPT 0
|
1663
|
+
# endif
|
1367
1664
|
#endif
|
1368
1665
|
|
1369
1666
|
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
|
1370
|
-
|
1371
|
-
|
1667
|
+
/* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
|
1668
|
+
# if XXH_SIZE_OPT >= 1 || \
|
1669
|
+
defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
|
1670
|
+
|| defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
|
1372
1671
|
# define XXH_FORCE_ALIGN_CHECK 0
|
1373
1672
|
# else
|
1374
1673
|
# define XXH_FORCE_ALIGN_CHECK 1
|
@@ -1376,22 +1675,16 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1376
1675
|
#endif
|
1377
1676
|
|
1378
1677
|
#ifndef XXH_NO_INLINE_HINTS
|
1379
|
-
# if defined(
|
1380
|
-
|| defined(__NO_INLINE__) /* -O0, -fno-inline */
|
1678
|
+
# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
|
1381
1679
|
# define XXH_NO_INLINE_HINTS 1
|
1382
1680
|
# else
|
1383
1681
|
# define XXH_NO_INLINE_HINTS 0
|
1384
1682
|
# endif
|
1385
1683
|
#endif
|
1386
1684
|
|
1387
|
-
#ifndef
|
1388
|
-
|
1389
|
-
|
1390
|
-
/* The if/then loop is preferable to switch/case on gcc (on x64) */
|
1391
|
-
# define XXH_REROLL 1
|
1392
|
-
# else
|
1393
|
-
# define XXH_REROLL 0
|
1394
|
-
# endif
|
1685
|
+
#ifndef XXH32_ENDJMP
|
1686
|
+
/* generally preferable for performance */
|
1687
|
+
# define XXH32_ENDJMP 0
|
1395
1688
|
#endif
|
1396
1689
|
|
1397
1690
|
/*!
|
@@ -1403,6 +1696,24 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1403
1696
|
/* *************************************
|
1404
1697
|
* Includes & Memory related functions
|
1405
1698
|
***************************************/
|
1699
|
+
#if defined(XXH_NO_STREAM)
|
1700
|
+
/* nothing */
|
1701
|
+
#elif defined(XXH_NO_STDLIB)
|
1702
|
+
|
1703
|
+
/* When requesting to disable any mention of stdlib,
|
1704
|
+
* the library loses the ability to invoked malloc / free.
|
1705
|
+
* In practice, it means that functions like `XXH*_createState()`
|
1706
|
+
* will always fail, and return NULL.
|
1707
|
+
* This flag is useful in situations where
|
1708
|
+
* xxhash.h is integrated into some kernel, embedded or limited environment
|
1709
|
+
* without access to dynamic allocation.
|
1710
|
+
*/
|
1711
|
+
|
1712
|
+
static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
|
1713
|
+
static void XXH_free(void* p) { (void)p; }
|
1714
|
+
|
1715
|
+
#else
|
1716
|
+
|
1406
1717
|
/*
|
1407
1718
|
* Modify the local functions below should you wish to use
|
1408
1719
|
* different memory routines for malloc() and free()
|
@@ -1413,7 +1724,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
1413
1724
|
* @internal
|
1414
1725
|
* @brief Modify this function to use a different routine than malloc().
|
1415
1726
|
*/
|
1416
|
-
static void* XXH_malloc(size_t s) { return malloc(s); }
|
1727
|
+
static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
|
1417
1728
|
|
1418
1729
|
/*!
|
1419
1730
|
* @internal
|
@@ -1421,6 +1732,8 @@ static void* XXH_malloc(size_t s) { return malloc(s); }
|
|
1421
1732
|
*/
|
1422
1733
|
static void XXH_free(void* p) { free(p); }
|
1423
1734
|
|
1735
|
+
#endif /* XXH_NO_STDLIB */
|
1736
|
+
|
1424
1737
|
#include <string.h>
|
1425
1738
|
|
1426
1739
|
/*!
|
@@ -1443,19 +1756,19 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1443
1756
|
#endif
|
1444
1757
|
|
1445
1758
|
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
|
1446
|
-
# if defined(__GNUC__)
|
1759
|
+
# if defined(__GNUC__) || defined(__clang__)
|
1447
1760
|
# define XXH_FORCE_INLINE static __attribute__((unused))
|
1448
1761
|
# else
|
1449
1762
|
# define XXH_FORCE_INLINE static
|
1450
1763
|
# endif
|
1451
1764
|
# define XXH_NO_INLINE static
|
1452
1765
|
/* enable inlining hints */
|
1766
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
1767
|
+
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
1768
|
+
# define XXH_NO_INLINE static __attribute__((noinline))
|
1453
1769
|
#elif defined(_MSC_VER) /* Visual Studio */
|
1454
1770
|
# define XXH_FORCE_INLINE static __forceinline
|
1455
1771
|
# define XXH_NO_INLINE static __declspec(noinline)
|
1456
|
-
#elif defined(__GNUC__)
|
1457
|
-
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
1458
|
-
# define XXH_NO_INLINE static __attribute__((noinline))
|
1459
1772
|
#elif defined (__cplusplus) \
|
1460
1773
|
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
|
1461
1774
|
# define XXH_FORCE_INLINE static inline
|
@@ -1496,8 +1809,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1496
1809
|
/* note: use after variable declarations */
|
1497
1810
|
#ifndef XXH_STATIC_ASSERT
|
1498
1811
|
# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
|
1499
|
-
#
|
1500
|
-
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
|
1812
|
+
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
|
1501
1813
|
# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
|
1502
1814
|
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
|
1503
1815
|
# else
|
@@ -1522,7 +1834,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
1522
1834
|
* We also use it to prevent unwanted constant folding for AArch64 in
|
1523
1835
|
* XXH3_initCustomSecret_scalar().
|
1524
1836
|
*/
|
1525
|
-
#
|
1837
|
+
#if defined(__GNUC__) || defined(__clang__)
|
1526
1838
|
# define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
|
1527
1839
|
#else
|
1528
1840
|
# define XXH_COMPILER_GUARD(var) ((void)0)
|
@@ -1615,30 +1927,31 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
|
|
1615
1927
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
1616
1928
|
|
1617
1929
|
/*
|
1618
|
-
*
|
1619
|
-
*
|
1620
|
-
*
|
1621
|
-
*
|
1930
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
1931
|
+
* documentation claimed that it only increased the alignment, but actually it
|
1932
|
+
* can decrease it on gcc, clang, and icc:
|
1933
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
1934
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
1622
1935
|
*/
|
1623
1936
|
#ifdef XXH_OLD_NAMES
|
1624
1937
|
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
|
1625
1938
|
#endif
|
1626
1939
|
static xxh_u32 XXH_read32(const void* ptr)
|
1627
1940
|
{
|
1628
|
-
typedef
|
1629
|
-
return ((const
|
1941
|
+
typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
|
1942
|
+
return *((const xxh_unalign32*)ptr);
|
1630
1943
|
}
|
1631
1944
|
|
1632
1945
|
#else
|
1633
1946
|
|
1634
1947
|
/*
|
1635
1948
|
* Portable and safe solution. Generally efficient.
|
1636
|
-
* see:
|
1949
|
+
* see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
1637
1950
|
*/
|
1638
1951
|
static xxh_u32 XXH_read32(const void* memPtr)
|
1639
1952
|
{
|
1640
1953
|
xxh_u32 val;
|
1641
|
-
|
1954
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
1642
1955
|
return val;
|
1643
1956
|
}
|
1644
1957
|
|
@@ -1831,8 +2144,10 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
|
|
1831
2144
|
*********************************************************************/
|
1832
2145
|
/*!
|
1833
2146
|
* @}
|
1834
|
-
* @defgroup
|
2147
|
+
* @defgroup XXH32_impl XXH32 implementation
|
1835
2148
|
* @ingroup impl
|
2149
|
+
*
|
2150
|
+
* Details on the XXH32 implementation.
|
1836
2151
|
* @{
|
1837
2152
|
*/
|
1838
2153
|
/* #define instead of static const, to be used as initializers */
|
@@ -1912,17 +2227,17 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
|
|
1912
2227
|
* The final mix ensures that all input bits have a chance to impact any bit in
|
1913
2228
|
* the output digest, resulting in an unbiased distribution.
|
1914
2229
|
*
|
1915
|
-
* @param
|
2230
|
+
* @param hash The hash to avalanche.
|
1916
2231
|
* @return The avalanched hash.
|
1917
2232
|
*/
|
1918
|
-
static xxh_u32 XXH32_avalanche(xxh_u32
|
2233
|
+
static xxh_u32 XXH32_avalanche(xxh_u32 hash)
|
1919
2234
|
{
|
1920
|
-
|
1921
|
-
|
1922
|
-
|
1923
|
-
|
1924
|
-
|
1925
|
-
return
|
2235
|
+
hash ^= hash >> 15;
|
2236
|
+
hash *= XXH_PRIME32_2;
|
2237
|
+
hash ^= hash >> 13;
|
2238
|
+
hash *= XXH_PRIME32_3;
|
2239
|
+
hash ^= hash >> 16;
|
2240
|
+
return hash;
|
1926
2241
|
}
|
1927
2242
|
|
1928
2243
|
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
|
@@ -1935,28 +2250,31 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32)
|
|
1935
2250
|
* This final stage will digest them to ensure that all input bytes are present
|
1936
2251
|
* in the final mix.
|
1937
2252
|
*
|
1938
|
-
* @param
|
2253
|
+
* @param hash The hash to finalize.
|
1939
2254
|
* @param ptr The pointer to the remaining input.
|
1940
2255
|
* @param len The remaining length, modulo 16.
|
1941
2256
|
* @param align Whether @p ptr is aligned.
|
1942
2257
|
* @return The finalized hash.
|
2258
|
+
* @see XXH64_finalize().
|
1943
2259
|
*/
|
1944
|
-
static xxh_u32
|
1945
|
-
XXH32_finalize(xxh_u32
|
2260
|
+
static XXH_PUREF xxh_u32
|
2261
|
+
XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
1946
2262
|
{
|
1947
|
-
#define XXH_PROCESS1 do {
|
1948
|
-
|
1949
|
-
|
2263
|
+
#define XXH_PROCESS1 do { \
|
2264
|
+
hash += (*ptr++) * XXH_PRIME32_5; \
|
2265
|
+
hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
|
1950
2266
|
} while (0)
|
1951
2267
|
|
1952
|
-
#define XXH_PROCESS4 do {
|
1953
|
-
|
1954
|
-
ptr += 4;
|
1955
|
-
|
2268
|
+
#define XXH_PROCESS4 do { \
|
2269
|
+
hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
|
2270
|
+
ptr += 4; \
|
2271
|
+
hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
|
1956
2272
|
} while (0)
|
1957
2273
|
|
1958
|
-
|
1959
|
-
|
2274
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
2275
|
+
|
2276
|
+
/* Compact rerolled version; generally faster */
|
2277
|
+
if (!XXH32_ENDJMP) {
|
1960
2278
|
len &= 15;
|
1961
2279
|
while (len >= 4) {
|
1962
2280
|
XXH_PROCESS4;
|
@@ -1966,7 +2284,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1966
2284
|
XXH_PROCESS1;
|
1967
2285
|
--len;
|
1968
2286
|
}
|
1969
|
-
return XXH32_avalanche(
|
2287
|
+
return XXH32_avalanche(hash);
|
1970
2288
|
} else {
|
1971
2289
|
switch(len&15) /* or switch(bEnd - p) */ {
|
1972
2290
|
case 12: XXH_PROCESS4;
|
@@ -1974,7 +2292,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1974
2292
|
case 8: XXH_PROCESS4;
|
1975
2293
|
XXH_FALLTHROUGH;
|
1976
2294
|
case 4: XXH_PROCESS4;
|
1977
|
-
return XXH32_avalanche(
|
2295
|
+
return XXH32_avalanche(hash);
|
1978
2296
|
|
1979
2297
|
case 13: XXH_PROCESS4;
|
1980
2298
|
XXH_FALLTHROUGH;
|
@@ -1982,7 +2300,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1982
2300
|
XXH_FALLTHROUGH;
|
1983
2301
|
case 5: XXH_PROCESS4;
|
1984
2302
|
XXH_PROCESS1;
|
1985
|
-
return XXH32_avalanche(
|
2303
|
+
return XXH32_avalanche(hash);
|
1986
2304
|
|
1987
2305
|
case 14: XXH_PROCESS4;
|
1988
2306
|
XXH_FALLTHROUGH;
|
@@ -1991,7 +2309,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1991
2309
|
case 6: XXH_PROCESS4;
|
1992
2310
|
XXH_PROCESS1;
|
1993
2311
|
XXH_PROCESS1;
|
1994
|
-
return XXH32_avalanche(
|
2312
|
+
return XXH32_avalanche(hash);
|
1995
2313
|
|
1996
2314
|
case 15: XXH_PROCESS4;
|
1997
2315
|
XXH_FALLTHROUGH;
|
@@ -2005,10 +2323,10 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2005
2323
|
XXH_FALLTHROUGH;
|
2006
2324
|
case 1: XXH_PROCESS1;
|
2007
2325
|
XXH_FALLTHROUGH;
|
2008
|
-
case 0: return XXH32_avalanche(
|
2326
|
+
case 0: return XXH32_avalanche(hash);
|
2009
2327
|
}
|
2010
2328
|
XXH_ASSERT(0);
|
2011
|
-
return
|
2329
|
+
return hash; /* reaching this point is deemed impossible */
|
2012
2330
|
}
|
2013
2331
|
}
|
2014
2332
|
|
@@ -2024,24 +2342,19 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2024
2342
|
* @internal
|
2025
2343
|
* @brief The implementation for @ref XXH32().
|
2026
2344
|
*
|
2027
|
-
* @param input, len, seed Directly passed from @ref XXH32().
|
2345
|
+
* @param input , len , seed Directly passed from @ref XXH32().
|
2028
2346
|
* @param align Whether @p input is aligned.
|
2029
2347
|
* @return The calculated hash.
|
2030
2348
|
*/
|
2031
|
-
XXH_FORCE_INLINE xxh_u32
|
2349
|
+
XXH_FORCE_INLINE XXH_PUREF xxh_u32
|
2032
2350
|
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
|
2033
2351
|
{
|
2034
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
2035
2352
|
xxh_u32 h32;
|
2036
2353
|
|
2037
|
-
|
2038
|
-
if (input==NULL) {
|
2039
|
-
len=0;
|
2040
|
-
bEnd=input=(const xxh_u8*)(size_t)16;
|
2041
|
-
}
|
2042
|
-
#endif
|
2354
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
2043
2355
|
|
2044
2356
|
if (len>=16) {
|
2357
|
+
const xxh_u8* const bEnd = input + len;
|
2045
2358
|
const xxh_u8* const limit = bEnd - 15;
|
2046
2359
|
xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
2047
2360
|
xxh_u32 v2 = seed + XXH_PRIME32_2;
|
@@ -2066,10 +2379,10 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
|
|
2066
2379
|
return XXH32_finalize(h32, input, len&15, align);
|
2067
2380
|
}
|
2068
2381
|
|
2069
|
-
/*! @ingroup
|
2382
|
+
/*! @ingroup XXH32_family */
|
2070
2383
|
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
|
2071
2384
|
{
|
2072
|
-
#if
|
2385
|
+
#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
|
2073
2386
|
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
2074
2387
|
XXH32_state_t state;
|
2075
2388
|
XXH32_reset(&state, seed);
|
@@ -2088,51 +2401,46 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s
|
|
2088
2401
|
|
2089
2402
|
|
2090
2403
|
/******* Hash streaming *******/
|
2091
|
-
|
2092
|
-
|
2093
|
-
*/
|
2404
|
+
#ifndef XXH_NO_STREAM
|
2405
|
+
/*! @ingroup XXH32_family */
|
2094
2406
|
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
|
2095
2407
|
{
|
2096
2408
|
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
|
2097
2409
|
}
|
2098
|
-
/*! @ingroup
|
2410
|
+
/*! @ingroup XXH32_family */
|
2099
2411
|
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
|
2100
2412
|
{
|
2101
2413
|
XXH_free(statePtr);
|
2102
2414
|
return XXH_OK;
|
2103
2415
|
}
|
2104
2416
|
|
2105
|
-
/*! @ingroup
|
2417
|
+
/*! @ingroup XXH32_family */
|
2106
2418
|
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
|
2107
2419
|
{
|
2108
|
-
|
2420
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
2109
2421
|
}
|
2110
2422
|
|
2111
|
-
/*! @ingroup
|
2423
|
+
/*! @ingroup XXH32_family */
|
2112
2424
|
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
|
2113
2425
|
{
|
2114
|
-
|
2115
|
-
memset(
|
2116
|
-
|
2117
|
-
|
2118
|
-
|
2119
|
-
|
2120
|
-
/* do not write into reserved, planned to be removed in a future version */
|
2121
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
|
2426
|
+
XXH_ASSERT(statePtr != NULL);
|
2427
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
2428
|
+
statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
2429
|
+
statePtr->v[1] = seed + XXH_PRIME32_2;
|
2430
|
+
statePtr->v[2] = seed + 0;
|
2431
|
+
statePtr->v[3] = seed - XXH_PRIME32_1;
|
2122
2432
|
return XXH_OK;
|
2123
2433
|
}
|
2124
2434
|
|
2125
2435
|
|
2126
|
-
/*! @ingroup
|
2436
|
+
/*! @ingroup XXH32_family */
|
2127
2437
|
XXH_PUBLIC_API XXH_errorcode
|
2128
2438
|
XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
2129
2439
|
{
|
2130
|
-
if (input==NULL)
|
2131
|
-
|
2440
|
+
if (input==NULL) {
|
2441
|
+
XXH_ASSERT(len == 0);
|
2132
2442
|
return XXH_OK;
|
2133
|
-
|
2134
|
-
return XXH_ERROR;
|
2135
|
-
#endif
|
2443
|
+
}
|
2136
2444
|
|
2137
2445
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
2138
2446
|
const xxh_u8* const bEnd = p + len;
|
@@ -2149,10 +2457,10 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2149
2457
|
if (state->memsize) { /* some data left from previous update */
|
2150
2458
|
XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
|
2151
2459
|
{ const xxh_u32* p32 = state->mem32;
|
2152
|
-
state->
|
2153
|
-
state->
|
2154
|
-
state->
|
2155
|
-
state->
|
2460
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
|
2461
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
|
2462
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
|
2463
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
|
2156
2464
|
}
|
2157
2465
|
p += 16-state->memsize;
|
2158
2466
|
state->memsize = 0;
|
@@ -2160,22 +2468,14 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2160
2468
|
|
2161
2469
|
if (p <= bEnd-16) {
|
2162
2470
|
const xxh_u8* const limit = bEnd - 16;
|
2163
|
-
xxh_u32 v1 = state->v1;
|
2164
|
-
xxh_u32 v2 = state->v2;
|
2165
|
-
xxh_u32 v3 = state->v3;
|
2166
|
-
xxh_u32 v4 = state->v4;
|
2167
2471
|
|
2168
2472
|
do {
|
2169
|
-
|
2170
|
-
|
2171
|
-
|
2172
|
-
|
2473
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
|
2474
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
|
2475
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
|
2476
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
|
2173
2477
|
} while (p<=limit);
|
2174
2478
|
|
2175
|
-
state->v1 = v1;
|
2176
|
-
state->v2 = v2;
|
2177
|
-
state->v3 = v3;
|
2178
|
-
state->v4 = v4;
|
2179
2479
|
}
|
2180
2480
|
|
2181
2481
|
if (p < bEnd) {
|
@@ -2188,30 +2488,30 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2188
2488
|
}
|
2189
2489
|
|
2190
2490
|
|
2191
|
-
/*! @ingroup
|
2491
|
+
/*! @ingroup XXH32_family */
|
2192
2492
|
XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
|
2193
2493
|
{
|
2194
2494
|
xxh_u32 h32;
|
2195
2495
|
|
2196
2496
|
if (state->large_len) {
|
2197
|
-
h32 = XXH_rotl32(state->
|
2198
|
-
+ XXH_rotl32(state->
|
2199
|
-
+ XXH_rotl32(state->
|
2200
|
-
+ XXH_rotl32(state->
|
2497
|
+
h32 = XXH_rotl32(state->v[0], 1)
|
2498
|
+
+ XXH_rotl32(state->v[1], 7)
|
2499
|
+
+ XXH_rotl32(state->v[2], 12)
|
2500
|
+
+ XXH_rotl32(state->v[3], 18);
|
2201
2501
|
} else {
|
2202
|
-
h32 = state->
|
2502
|
+
h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
|
2203
2503
|
}
|
2204
2504
|
|
2205
2505
|
h32 += state->total_len_32;
|
2206
2506
|
|
2207
2507
|
return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
|
2208
2508
|
}
|
2209
|
-
|
2509
|
+
#endif /* !XXH_NO_STREAM */
|
2210
2510
|
|
2211
2511
|
/******* Canonical representation *******/
|
2212
2512
|
|
2213
2513
|
/*!
|
2214
|
-
* @ingroup
|
2514
|
+
* @ingroup XXH32_family
|
2215
2515
|
* The default return values from XXH functions are unsigned 32 and 64 bit
|
2216
2516
|
* integers.
|
2217
2517
|
*
|
@@ -2228,9 +2528,9 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
2228
2528
|
{
|
2229
2529
|
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
2230
2530
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
2231
|
-
|
2531
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
2232
2532
|
}
|
2233
|
-
/*! @ingroup
|
2533
|
+
/*! @ingroup XXH32_family */
|
2234
2534
|
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
2235
2535
|
{
|
2236
2536
|
return XXH_readBE32(src);
|
@@ -2271,30 +2571,31 @@ static xxh_u64 XXH_read64(const void* memPtr)
|
|
2271
2571
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
2272
2572
|
|
2273
2573
|
/*
|
2274
|
-
*
|
2275
|
-
*
|
2276
|
-
*
|
2277
|
-
*
|
2574
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
2575
|
+
* documentation claimed that it only increased the alignment, but actually it
|
2576
|
+
* can decrease it on gcc, clang, and icc:
|
2577
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
2578
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
2278
2579
|
*/
|
2279
2580
|
#ifdef XXH_OLD_NAMES
|
2280
2581
|
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
|
2281
2582
|
#endif
|
2282
2583
|
static xxh_u64 XXH_read64(const void* ptr)
|
2283
2584
|
{
|
2284
|
-
typedef
|
2285
|
-
return ((const xxh_unalign64*)ptr)
|
2585
|
+
typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
|
2586
|
+
return *((const xxh_unalign64*)ptr);
|
2286
2587
|
}
|
2287
2588
|
|
2288
2589
|
#else
|
2289
2590
|
|
2290
2591
|
/*
|
2291
2592
|
* Portable and safe solution. Generally efficient.
|
2292
|
-
* see:
|
2593
|
+
* see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
2293
2594
|
*/
|
2294
2595
|
static xxh_u64 XXH_read64(const void* memPtr)
|
2295
2596
|
{
|
2296
2597
|
xxh_u64 val;
|
2297
|
-
|
2598
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
2298
2599
|
return val;
|
2299
2600
|
}
|
2300
2601
|
|
@@ -2373,8 +2674,10 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
|
|
2373
2674
|
/******* xxh64 *******/
|
2374
2675
|
/*!
|
2375
2676
|
* @}
|
2376
|
-
* @defgroup
|
2677
|
+
* @defgroup XXH64_impl XXH64 implementation
|
2377
2678
|
* @ingroup impl
|
2679
|
+
*
|
2680
|
+
* Details on the XXH64 implementation.
|
2378
2681
|
* @{
|
2379
2682
|
*/
|
2380
2683
|
/* #define rather that static const, to be used as initializers */
|
@@ -2392,6 +2695,7 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
|
|
2392
2695
|
# define PRIME64_5 XXH_PRIME64_5
|
2393
2696
|
#endif
|
2394
2697
|
|
2698
|
+
/*! @copydoc XXH32_round */
|
2395
2699
|
static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
|
2396
2700
|
{
|
2397
2701
|
acc += input * XXH_PRIME64_2;
|
@@ -2408,42 +2712,59 @@ static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
|
|
2408
2712
|
return acc;
|
2409
2713
|
}
|
2410
2714
|
|
2411
|
-
|
2715
|
+
/*! @copydoc XXH32_avalanche */
|
2716
|
+
static xxh_u64 XXH64_avalanche(xxh_u64 hash)
|
2412
2717
|
{
|
2413
|
-
|
2414
|
-
|
2415
|
-
|
2416
|
-
|
2417
|
-
|
2418
|
-
return
|
2718
|
+
hash ^= hash >> 33;
|
2719
|
+
hash *= XXH_PRIME64_2;
|
2720
|
+
hash ^= hash >> 29;
|
2721
|
+
hash *= XXH_PRIME64_3;
|
2722
|
+
hash ^= hash >> 32;
|
2723
|
+
return hash;
|
2419
2724
|
}
|
2420
2725
|
|
2421
2726
|
|
2422
2727
|
#define XXH_get64bits(p) XXH_readLE64_align(p, align)
|
2423
2728
|
|
2424
|
-
|
2425
|
-
|
2729
|
+
/*!
|
2730
|
+
* @internal
|
2731
|
+
* @brief Processes the last 0-31 bytes of @p ptr.
|
2732
|
+
*
|
2733
|
+
* There may be up to 31 bytes remaining to consume from the input.
|
2734
|
+
* This final stage will digest them to ensure that all input bytes are present
|
2735
|
+
* in the final mix.
|
2736
|
+
*
|
2737
|
+
* @param hash The hash to finalize.
|
2738
|
+
* @param ptr The pointer to the remaining input.
|
2739
|
+
* @param len The remaining length, modulo 32.
|
2740
|
+
* @param align Whether @p ptr is aligned.
|
2741
|
+
* @return The finalized hash
|
2742
|
+
* @see XXH32_finalize().
|
2743
|
+
*/
|
2744
|
+
static XXH_PUREF xxh_u64
|
2745
|
+
XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
2426
2746
|
{
|
2747
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
2427
2748
|
len &= 31;
|
2428
2749
|
while (len >= 8) {
|
2429
2750
|
xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
|
2430
2751
|
ptr += 8;
|
2431
|
-
|
2432
|
-
|
2752
|
+
hash ^= k1;
|
2753
|
+
hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
|
2433
2754
|
len -= 8;
|
2434
2755
|
}
|
2435
2756
|
if (len >= 4) {
|
2436
|
-
|
2757
|
+
hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
|
2437
2758
|
ptr += 4;
|
2438
|
-
|
2759
|
+
hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
|
2439
2760
|
len -= 4;
|
2440
2761
|
}
|
2441
2762
|
while (len > 0) {
|
2442
|
-
|
2443
|
-
|
2763
|
+
hash ^= (*ptr++) * XXH_PRIME64_5;
|
2764
|
+
hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
|
2444
2765
|
--len;
|
2445
2766
|
}
|
2446
|
-
return XXH64_avalanche(
|
2767
|
+
return XXH64_avalanche(hash);
|
2447
2768
|
}
|
2448
2769
|
|
2449
2770
|
#ifdef XXH_OLD_NAMES
|
@@ -2456,21 +2777,23 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2456
2777
|
# undef XXH_PROCESS8_64
|
2457
2778
|
#endif
|
2458
2779
|
|
2459
|
-
|
2780
|
+
/*!
|
2781
|
+
* @internal
|
2782
|
+
* @brief The implementation for @ref XXH64().
|
2783
|
+
*
|
2784
|
+
* @param input , len , seed Directly passed from @ref XXH64().
|
2785
|
+
* @param align Whether @p input is aligned.
|
2786
|
+
* @return The calculated hash.
|
2787
|
+
*/
|
2788
|
+
XXH_FORCE_INLINE XXH_PUREF xxh_u64
|
2460
2789
|
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
|
2461
2790
|
{
|
2462
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
2463
2791
|
xxh_u64 h64;
|
2464
|
-
|
2465
|
-
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
|
2466
|
-
if (input==NULL) {
|
2467
|
-
len=0;
|
2468
|
-
bEnd=input=(const xxh_u8*)(size_t)32;
|
2469
|
-
}
|
2470
|
-
#endif
|
2792
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
2471
2793
|
|
2472
2794
|
if (len>=32) {
|
2473
|
-
const xxh_u8* const
|
2795
|
+
const xxh_u8* const bEnd = input + len;
|
2796
|
+
const xxh_u8* const limit = bEnd - 31;
|
2474
2797
|
xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
2475
2798
|
xxh_u64 v2 = seed + XXH_PRIME64_2;
|
2476
2799
|
xxh_u64 v3 = seed + 0;
|
@@ -2481,7 +2804,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
|
|
2481
2804
|
v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
|
2482
2805
|
v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
|
2483
2806
|
v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
|
2484
|
-
} while (input
|
2807
|
+
} while (input<limit);
|
2485
2808
|
|
2486
2809
|
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
2487
2810
|
h64 = XXH64_mergeRound(h64, v1);
|
@@ -2499,10 +2822,10 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
|
|
2499
2822
|
}
|
2500
2823
|
|
2501
2824
|
|
2502
|
-
/*! @ingroup
|
2825
|
+
/*! @ingroup XXH64_family */
|
2503
2826
|
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
|
2504
2827
|
{
|
2505
|
-
#if
|
2828
|
+
#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
|
2506
2829
|
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
2507
2830
|
XXH64_state_t state;
|
2508
2831
|
XXH64_reset(&state, seed);
|
@@ -2520,49 +2843,45 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
|
|
2520
2843
|
}
|
2521
2844
|
|
2522
2845
|
/******* Hash Streaming *******/
|
2523
|
-
|
2524
|
-
/*! @ingroup
|
2846
|
+
#ifndef XXH_NO_STREAM
|
2847
|
+
/*! @ingroup XXH64_family*/
|
2525
2848
|
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
|
2526
2849
|
{
|
2527
2850
|
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
|
2528
2851
|
}
|
2529
|
-
/*! @ingroup
|
2852
|
+
/*! @ingroup XXH64_family */
|
2530
2853
|
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
2531
2854
|
{
|
2532
2855
|
XXH_free(statePtr);
|
2533
2856
|
return XXH_OK;
|
2534
2857
|
}
|
2535
2858
|
|
2536
|
-
/*! @ingroup
|
2859
|
+
/*! @ingroup XXH64_family */
|
2537
2860
|
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
|
2538
2861
|
{
|
2539
|
-
|
2862
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
2540
2863
|
}
|
2541
2864
|
|
2542
|
-
/*! @ingroup
|
2865
|
+
/*! @ingroup XXH64_family */
|
2543
2866
|
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
|
2544
2867
|
{
|
2545
|
-
|
2546
|
-
memset(
|
2547
|
-
|
2548
|
-
|
2549
|
-
|
2550
|
-
|
2551
|
-
/* do not write into reserved64, might be removed in a future version */
|
2552
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
|
2868
|
+
XXH_ASSERT(statePtr != NULL);
|
2869
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
2870
|
+
statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
2871
|
+
statePtr->v[1] = seed + XXH_PRIME64_2;
|
2872
|
+
statePtr->v[2] = seed + 0;
|
2873
|
+
statePtr->v[3] = seed - XXH_PRIME64_1;
|
2553
2874
|
return XXH_OK;
|
2554
2875
|
}
|
2555
2876
|
|
2556
|
-
/*! @ingroup
|
2877
|
+
/*! @ingroup XXH64_family */
|
2557
2878
|
XXH_PUBLIC_API XXH_errorcode
|
2558
2879
|
XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
2559
2880
|
{
|
2560
|
-
if (input==NULL)
|
2561
|
-
|
2881
|
+
if (input==NULL) {
|
2882
|
+
XXH_ASSERT(len == 0);
|
2562
2883
|
return XXH_OK;
|
2563
|
-
|
2564
|
-
return XXH_ERROR;
|
2565
|
-
#endif
|
2884
|
+
}
|
2566
2885
|
|
2567
2886
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
2568
2887
|
const xxh_u8* const bEnd = p + len;
|
@@ -2577,32 +2896,24 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
2577
2896
|
|
2578
2897
|
if (state->memsize) { /* tmp buffer is full */
|
2579
2898
|
XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
|
2580
|
-
state->
|
2581
|
-
state->
|
2582
|
-
state->
|
2583
|
-
state->
|
2899
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
|
2900
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
|
2901
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
|
2902
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
|
2584
2903
|
p += 32 - state->memsize;
|
2585
2904
|
state->memsize = 0;
|
2586
2905
|
}
|
2587
2906
|
|
2588
2907
|
if (p+32 <= bEnd) {
|
2589
2908
|
const xxh_u8* const limit = bEnd - 32;
|
2590
|
-
xxh_u64 v1 = state->v1;
|
2591
|
-
xxh_u64 v2 = state->v2;
|
2592
|
-
xxh_u64 v3 = state->v3;
|
2593
|
-
xxh_u64 v4 = state->v4;
|
2594
2909
|
|
2595
2910
|
do {
|
2596
|
-
|
2597
|
-
|
2598
|
-
|
2599
|
-
|
2911
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
|
2912
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
|
2913
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
|
2914
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
|
2600
2915
|
} while (p<=limit);
|
2601
2916
|
|
2602
|
-
state->v1 = v1;
|
2603
|
-
state->v2 = v2;
|
2604
|
-
state->v3 = v3;
|
2605
|
-
state->v4 = v4;
|
2606
2917
|
}
|
2607
2918
|
|
2608
2919
|
if (p < bEnd) {
|
@@ -2615,43 +2926,38 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
2615
2926
|
}
|
2616
2927
|
|
2617
2928
|
|
2618
|
-
/*! @ingroup
|
2929
|
+
/*! @ingroup XXH64_family */
|
2619
2930
|
XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
|
2620
2931
|
{
|
2621
2932
|
xxh_u64 h64;
|
2622
2933
|
|
2623
2934
|
if (state->total_len >= 32) {
|
2624
|
-
|
2625
|
-
|
2626
|
-
|
2627
|
-
|
2628
|
-
|
2629
|
-
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
2630
|
-
h64 = XXH64_mergeRound(h64, v1);
|
2631
|
-
h64 = XXH64_mergeRound(h64, v2);
|
2632
|
-
h64 = XXH64_mergeRound(h64, v3);
|
2633
|
-
h64 = XXH64_mergeRound(h64, v4);
|
2935
|
+
h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
|
2936
|
+
h64 = XXH64_mergeRound(h64, state->v[0]);
|
2937
|
+
h64 = XXH64_mergeRound(h64, state->v[1]);
|
2938
|
+
h64 = XXH64_mergeRound(h64, state->v[2]);
|
2939
|
+
h64 = XXH64_mergeRound(h64, state->v[3]);
|
2634
2940
|
} else {
|
2635
|
-
h64 = state->
|
2941
|
+
h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
|
2636
2942
|
}
|
2637
2943
|
|
2638
2944
|
h64 += (xxh_u64) state->total_len;
|
2639
2945
|
|
2640
2946
|
return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
|
2641
2947
|
}
|
2642
|
-
|
2948
|
+
#endif /* !XXH_NO_STREAM */
|
2643
2949
|
|
2644
2950
|
/******* Canonical representation *******/
|
2645
2951
|
|
2646
|
-
/*! @ingroup
|
2952
|
+
/*! @ingroup XXH64_family */
|
2647
2953
|
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
|
2648
2954
|
{
|
2649
2955
|
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
2650
2956
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
2651
|
-
|
2957
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
2652
2958
|
}
|
2653
2959
|
|
2654
|
-
/*! @ingroup
|
2960
|
+
/*! @ingroup XXH64_family */
|
2655
2961
|
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
|
2656
2962
|
{
|
2657
2963
|
return XXH_readBE64(src);
|
@@ -2665,7 +2971,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
2665
2971
|
************************************************************************ */
|
2666
2972
|
/*!
|
2667
2973
|
* @}
|
2668
|
-
* @defgroup
|
2974
|
+
* @defgroup XXH3_impl XXH3 implementation
|
2669
2975
|
* @ingroup impl
|
2670
2976
|
* @{
|
2671
2977
|
*/
|
@@ -2691,17 +2997,21 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
2691
2997
|
# define XXH_unlikely(x) (x)
|
2692
2998
|
#endif
|
2693
2999
|
|
2694
|
-
#if defined(__GNUC__)
|
2695
|
-
# if defined(
|
2696
|
-
|
2697
|
-
|
2698
|
-
# include <emmintrin.h>
|
2699
|
-
# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
|
3000
|
+
#if defined(__GNUC__) || defined(__clang__)
|
3001
|
+
# if defined(__ARM_NEON__) || defined(__ARM_NEON) \
|
3002
|
+
|| defined(__aarch64__) || defined(_M_ARM) \
|
3003
|
+
|| defined(_M_ARM64) || defined(_M_ARM64EC)
|
2700
3004
|
# define inline __inline__ /* circumvent a clang bug */
|
2701
3005
|
# include <arm_neon.h>
|
2702
3006
|
# undef inline
|
3007
|
+
# elif defined(__AVX2__)
|
3008
|
+
# include <immintrin.h>
|
3009
|
+
# elif defined(__SSE2__)
|
3010
|
+
# include <emmintrin.h>
|
2703
3011
|
# endif
|
2704
|
-
#
|
3012
|
+
#endif
|
3013
|
+
|
3014
|
+
#if defined(_MSC_VER)
|
2705
3015
|
# include <intrin.h>
|
2706
3016
|
#endif
|
2707
3017
|
|
@@ -2839,17 +3149,20 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2839
3149
|
#endif
|
2840
3150
|
|
2841
3151
|
#ifndef XXH_VECTOR /* can be defined on command line */
|
2842
|
-
# if
|
3152
|
+
# if ( \
|
3153
|
+
defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
|
3154
|
+
|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
|
3155
|
+
) && ( \
|
3156
|
+
defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
|
3157
|
+
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
|
3158
|
+
)
|
3159
|
+
# define XXH_VECTOR XXH_NEON
|
3160
|
+
# elif defined(__AVX512F__)
|
2843
3161
|
# define XXH_VECTOR XXH_AVX512
|
2844
3162
|
# elif defined(__AVX2__)
|
2845
3163
|
# define XXH_VECTOR XXH_AVX2
|
2846
3164
|
# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
|
2847
3165
|
# define XXH_VECTOR XXH_SSE2
|
2848
|
-
# elif defined(__GNUC__) /* msvc support maybe later */ \
|
2849
|
-
&& (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
|
2850
|
-
&& (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
|
2851
|
-
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
|
2852
|
-
# define XXH_VECTOR XXH_NEON
|
2853
3166
|
# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
|
2854
3167
|
|| (defined(__s390x__) && defined(__VEC__)) \
|
2855
3168
|
&& defined(__GNUC__) /* TODO: IBM XL */
|
@@ -2911,7 +3224,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2911
3224
|
*/
|
2912
3225
|
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
|
2913
3226
|
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
|
2914
|
-
&& defined(__OPTIMIZE__) &&
|
3227
|
+
&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
|
2915
3228
|
# pragma GCC push_options
|
2916
3229
|
# pragma GCC optimize("-O2")
|
2917
3230
|
#endif
|
@@ -2999,8 +3312,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
2999
3312
|
* }
|
3000
3313
|
*/
|
3001
3314
|
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
|
3002
|
-
&& defined(__GNUC__) \
|
3003
|
-
&&
|
3315
|
+
&& (defined(__GNUC__) || defined(__clang__)) \
|
3316
|
+
&& (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
|
3004
3317
|
# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
|
3005
3318
|
do { \
|
3006
3319
|
/* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
|
@@ -3017,6 +3330,76 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
3017
3330
|
(outHi) = vshrn_n_u64 ((in), 32); \
|
3018
3331
|
} while (0)
|
3019
3332
|
# endif
|
3333
|
+
|
3334
|
+
/*!
|
3335
|
+
* @internal
|
3336
|
+
* @brief `vld1q_u64` but faster and alignment-safe.
|
3337
|
+
*
|
3338
|
+
* On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
|
3339
|
+
* *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
|
3340
|
+
*
|
3341
|
+
* GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
|
3342
|
+
* prohibits load-store optimizations. Therefore, a direct dereference is used.
|
3343
|
+
*
|
3344
|
+
* Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
|
3345
|
+
* unaligned load.
|
3346
|
+
*/
|
3347
|
+
#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
|
3348
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
|
3349
|
+
{
|
3350
|
+
return *(uint64x2_t const*)ptr;
|
3351
|
+
}
|
3352
|
+
#else
|
3353
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
|
3354
|
+
{
|
3355
|
+
return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
|
3356
|
+
}
|
3357
|
+
#endif
|
3358
|
+
/*!
|
3359
|
+
* @ingroup tuning
|
3360
|
+
* @brief Controls the NEON to scalar ratio for XXH3
|
3361
|
+
*
|
3362
|
+
* On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
|
3363
|
+
* 2 lanes on scalar by default.
|
3364
|
+
*
|
3365
|
+
* This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
|
3366
|
+
* emulated 64-bit arithmetic is too slow.
|
3367
|
+
*
|
3368
|
+
* Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
|
3369
|
+
*
|
3370
|
+
* For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
|
3371
|
+
* have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
|
3372
|
+
* you are only using 2/3 of the CPU bandwidth.
|
3373
|
+
*
|
3374
|
+
* This is even more noticable on the more advanced cores like the A76 which
|
3375
|
+
* can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
|
3376
|
+
*
|
3377
|
+
* Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
|
3378
|
+
* remaining lanes will use scalar instructions. This improves the bandwidth
|
3379
|
+
* and also gives the integer pipelines something to do besides twiddling loop
|
3380
|
+
* counters and pointers.
|
3381
|
+
*
|
3382
|
+
* This change benefits CPUs with large micro-op buffers without negatively affecting
|
3383
|
+
* other CPUs:
|
3384
|
+
*
|
3385
|
+
* | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
|
3386
|
+
* |:----------------------|:--------------------|----------:|-----------:|------:|
|
3387
|
+
* | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
|
3388
|
+
* | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
|
3389
|
+
* | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
|
3390
|
+
*
|
3391
|
+
* It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
|
3392
|
+
*
|
3393
|
+
* @see XXH3_accumulate_512_neon()
|
3394
|
+
*/
|
3395
|
+
# ifndef XXH3_NEON_LANES
|
3396
|
+
# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
|
3397
|
+
&& XXH_SIZE_OPT <= 0
|
3398
|
+
# define XXH3_NEON_LANES 6
|
3399
|
+
# else
|
3400
|
+
# define XXH3_NEON_LANES XXH_ACC_NB
|
3401
|
+
# endif
|
3402
|
+
# endif
|
3020
3403
|
#endif /* XXH_VECTOR == XXH_NEON */
|
3021
3404
|
|
3022
3405
|
/*
|
@@ -3028,23 +3411,33 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
3028
3411
|
* inconsistent intrinsics, spotty coverage, and multiple endiannesses.
|
3029
3412
|
*/
|
3030
3413
|
#if XXH_VECTOR == XXH_VSX
|
3414
|
+
/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
|
3415
|
+
* and `pixel`. This is a problem for obvious reasons.
|
3416
|
+
*
|
3417
|
+
* These keywords are unnecessary; the spec literally says they are
|
3418
|
+
* equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
|
3419
|
+
* after including the header.
|
3420
|
+
*
|
3421
|
+
* We use pragma push_macro/pop_macro to keep the namespace clean. */
|
3422
|
+
# pragma push_macro("bool")
|
3423
|
+
# pragma push_macro("vector")
|
3424
|
+
# pragma push_macro("pixel")
|
3425
|
+
/* silence potential macro redefined warnings */
|
3426
|
+
# undef bool
|
3427
|
+
# undef vector
|
3428
|
+
# undef pixel
|
3429
|
+
|
3031
3430
|
# if defined(__s390x__)
|
3032
3431
|
# include <s390intrin.h>
|
3033
3432
|
# else
|
3034
|
-
/* gcc's altivec.h can have the unwanted consequence to unconditionally
|
3035
|
-
* #define bool, vector, and pixel keywords,
|
3036
|
-
* with bad consequences for programs already using these keywords for other purposes.
|
3037
|
-
* The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
|
3038
|
-
* __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
|
3039
|
-
* but it seems that, in some cases, it isn't.
|
3040
|
-
* Force the build macro to be defined, so that keywords are not altered.
|
3041
|
-
*/
|
3042
|
-
# if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
|
3043
|
-
# define __APPLE_ALTIVEC__
|
3044
|
-
# endif
|
3045
3433
|
# include <altivec.h>
|
3046
3434
|
# endif
|
3047
3435
|
|
3436
|
+
/* Restore the original macro values, if applicable. */
|
3437
|
+
# pragma pop_macro("pixel")
|
3438
|
+
# pragma pop_macro("vector")
|
3439
|
+
# pragma pop_macro("bool")
|
3440
|
+
|
3048
3441
|
typedef __vector unsigned long long xxh_u64x2;
|
3049
3442
|
typedef __vector unsigned char xxh_u8x16;
|
3050
3443
|
typedef __vector unsigned xxh_u32x4;
|
@@ -3083,7 +3476,7 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
|
|
3083
3476
|
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
|
3084
3477
|
{
|
3085
3478
|
xxh_u64x2 ret;
|
3086
|
-
|
3479
|
+
XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
|
3087
3480
|
# if XXH_VSX_BE
|
3088
3481
|
ret = XXH_vec_revb(ret);
|
3089
3482
|
# endif
|
@@ -3128,7 +3521,9 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
|
|
3128
3521
|
#if defined(XXH_NO_PREFETCH)
|
3129
3522
|
# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
|
3130
3523
|
#else
|
3131
|
-
# if
|
3524
|
+
# if XXH_SIZE_OPT >= 1
|
3525
|
+
# define XXH_PREFETCH(ptr) (void)(ptr)
|
3526
|
+
# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
|
3132
3527
|
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
3133
3528
|
# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
3134
3529
|
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
@@ -3193,7 +3588,6 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
3193
3588
|
return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
|
3194
3589
|
}
|
3195
3590
|
#elif defined(_MSC_VER) && defined(_M_IX86)
|
3196
|
-
# include <intrin.h>
|
3197
3591
|
# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
|
3198
3592
|
#else
|
3199
3593
|
/*
|
@@ -3212,7 +3606,7 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
3212
3606
|
* Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
|
3213
3607
|
* version.
|
3214
3608
|
*
|
3215
|
-
* @param lhs, rhs The 64-bit integers to be multiplied
|
3609
|
+
* @param lhs , rhs The 64-bit integers to be multiplied
|
3216
3610
|
* @return The 128-bit result represented in an @ref XXH128_hash_t.
|
3217
3611
|
*/
|
3218
3612
|
static XXH128_hash_t
|
@@ -3233,7 +3627,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3233
3627
|
* In that case it is best to use the portable one.
|
3234
3628
|
* https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
|
3235
3629
|
*/
|
3236
|
-
#if defined(__GNUC__) && !defined(__wasm__) \
|
3630
|
+
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
|
3237
3631
|
&& defined(__SIZEOF_INT128__) \
|
3238
3632
|
|| (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
|
3239
3633
|
|
@@ -3250,7 +3644,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3250
3644
|
*
|
3251
3645
|
* This compiles to single operand MUL on x64.
|
3252
3646
|
*/
|
3253
|
-
#elif defined(_M_X64) || defined(_M_IA64)
|
3647
|
+
#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
|
3254
3648
|
|
3255
3649
|
#ifndef _MSC_VER
|
3256
3650
|
# pragma intrinsic(_umul128)
|
@@ -3262,6 +3656,21 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3262
3656
|
r128.high64 = product_high;
|
3263
3657
|
return r128;
|
3264
3658
|
|
3659
|
+
/*
|
3660
|
+
* MSVC for ARM64's __umulh method.
|
3661
|
+
*
|
3662
|
+
* This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
|
3663
|
+
*/
|
3664
|
+
#elif defined(_M_ARM64) || defined(_M_ARM64EC)
|
3665
|
+
|
3666
|
+
#ifndef _MSC_VER
|
3667
|
+
# pragma intrinsic(__umulh)
|
3668
|
+
#endif
|
3669
|
+
XXH128_hash_t r128;
|
3670
|
+
r128.low64 = lhs * rhs;
|
3671
|
+
r128.high64 = __umulh(lhs, rhs);
|
3672
|
+
return r128;
|
3673
|
+
|
3265
3674
|
#else
|
3266
3675
|
/*
|
3267
3676
|
* Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
|
@@ -3330,7 +3739,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
3330
3739
|
* The reason for the separate function is to prevent passing too many structs
|
3331
3740
|
* around by value. This will hopefully inline the multiply, but we don't force it.
|
3332
3741
|
*
|
3333
|
-
* @param lhs, rhs The 64-bit integers to multiply
|
3742
|
+
* @param lhs , rhs The 64-bit integers to multiply
|
3334
3743
|
* @return The low 64 bits of the product XOR'd by the high 64 bits.
|
3335
3744
|
* @see XXH_mult64to128()
|
3336
3745
|
*/
|
@@ -3342,7 +3751,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
|
|
3342
3751
|
}
|
3343
3752
|
|
3344
3753
|
/*! Seems to produce slightly better code on GCC for some reason. */
|
3345
|
-
XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
|
3754
|
+
XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
|
3346
3755
|
{
|
3347
3756
|
XXH_ASSERT(0 <= shift && shift < 64);
|
3348
3757
|
return v64 ^ (v64 >> shift);
|
@@ -3409,7 +3818,7 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
|
|
3409
3818
|
*
|
3410
3819
|
* This adds an extra layer of strength for custom secrets.
|
3411
3820
|
*/
|
3412
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
3821
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
3413
3822
|
XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
3414
3823
|
{
|
3415
3824
|
XXH_ASSERT(input != NULL);
|
@@ -3431,7 +3840,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
|
|
3431
3840
|
}
|
3432
3841
|
}
|
3433
3842
|
|
3434
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
3843
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
3435
3844
|
XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
3436
3845
|
{
|
3437
3846
|
XXH_ASSERT(input != NULL);
|
@@ -3447,7 +3856,7 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
|
|
3447
3856
|
}
|
3448
3857
|
}
|
3449
3858
|
|
3450
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
3859
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
3451
3860
|
XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
3452
3861
|
{
|
3453
3862
|
XXH_ASSERT(input != NULL);
|
@@ -3464,7 +3873,7 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
|
|
3464
3873
|
}
|
3465
3874
|
}
|
3466
3875
|
|
3467
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
3876
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
3468
3877
|
XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
3469
3878
|
{
|
3470
3879
|
XXH_ASSERT(len <= 16);
|
@@ -3534,7 +3943,7 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
|
|
3534
3943
|
}
|
3535
3944
|
|
3536
3945
|
/* For mid range keys, XXH3 uses a Mum-hash variant. */
|
3537
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
3946
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
3538
3947
|
XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
3539
3948
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
3540
3949
|
XXH64_hash_t seed)
|
@@ -3543,6 +3952,14 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3543
3952
|
XXH_ASSERT(16 < len && len <= 128);
|
3544
3953
|
|
3545
3954
|
{ xxh_u64 acc = len * XXH_PRIME64_1;
|
3955
|
+
#if XXH_SIZE_OPT >= 1
|
3956
|
+
/* Smaller and cleaner, but slightly slower. */
|
3957
|
+
size_t i = (len - 1) / 32;
|
3958
|
+
do {
|
3959
|
+
acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
|
3960
|
+
acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
|
3961
|
+
} while (i-- != 0);
|
3962
|
+
#else
|
3546
3963
|
if (len > 32) {
|
3547
3964
|
if (len > 64) {
|
3548
3965
|
if (len > 96) {
|
@@ -3557,14 +3974,14 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3557
3974
|
}
|
3558
3975
|
acc += XXH3_mix16B(input+0, secret+0, seed);
|
3559
3976
|
acc += XXH3_mix16B(input+len-16, secret+16, seed);
|
3560
|
-
|
3977
|
+
#endif
|
3561
3978
|
return XXH3_avalanche(acc);
|
3562
3979
|
}
|
3563
3980
|
}
|
3564
3981
|
|
3565
3982
|
#define XXH3_MIDSIZE_MAX 240
|
3566
3983
|
|
3567
|
-
XXH_NO_INLINE XXH64_hash_t
|
3984
|
+
XXH_NO_INLINE XXH_PUREF XXH64_hash_t
|
3568
3985
|
XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
3569
3986
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
3570
3987
|
XXH64_hash_t seed)
|
@@ -3632,7 +4049,7 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3632
4049
|
XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
3633
4050
|
{
|
3634
4051
|
if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
|
3635
|
-
|
4052
|
+
XXH_memcpy(dst, &v64, sizeof(v64));
|
3636
4053
|
}
|
3637
4054
|
|
3638
4055
|
/* Several intrinsic functions below are supposed to accept __int64 as argument,
|
@@ -3649,6 +4066,7 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
|
3649
4066
|
typedef long long xxh_i64;
|
3650
4067
|
#endif
|
3651
4068
|
|
4069
|
+
|
3652
4070
|
/*
|
3653
4071
|
* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
|
3654
4072
|
*
|
@@ -3684,7 +4102,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
|
|
3684
4102
|
const void* XXH_RESTRICT input,
|
3685
4103
|
const void* XXH_RESTRICT secret)
|
3686
4104
|
{
|
3687
|
-
|
4105
|
+
__m512i* const xacc = (__m512i *) acc;
|
3688
4106
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
3689
4107
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
3690
4108
|
|
@@ -3733,7 +4151,7 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
3733
4151
|
{
|
3734
4152
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
3735
4153
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
3736
|
-
{
|
4154
|
+
{ __m512i* const xacc = (__m512i*) acc;
|
3737
4155
|
const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
|
3738
4156
|
|
3739
4157
|
/* xacc[0] ^= (xacc[0] >> 47) */
|
@@ -3794,7 +4212,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|
3794
4212
|
const void* XXH_RESTRICT secret)
|
3795
4213
|
{
|
3796
4214
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
3797
|
-
{
|
4215
|
+
{ __m256i* const xacc = (__m256i *) acc;
|
3798
4216
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3799
4217
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
3800
4218
|
const __m256i* const xinput = (const __m256i *) input;
|
@@ -3826,7 +4244,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void
|
|
3826
4244
|
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
3827
4245
|
{
|
3828
4246
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
3829
|
-
{
|
4247
|
+
{ __m256i* const xacc = (__m256i*) acc;
|
3830
4248
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3831
4249
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
3832
4250
|
const __m256i* const xsecret = (const __m256i *) secret;
|
@@ -3900,7 +4318,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
|
|
3900
4318
|
{
|
3901
4319
|
/* SSE2 is just a half-scale version of the AVX2 version. */
|
3902
4320
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
3903
|
-
{
|
4321
|
+
{ __m128i* const xacc = (__m128i *) acc;
|
3904
4322
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3905
4323
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
3906
4324
|
const __m128i* const xinput = (const __m128i *) input;
|
@@ -3932,7 +4350,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void
|
|
3932
4350
|
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
3933
4351
|
{
|
3934
4352
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
3935
|
-
{
|
4353
|
+
{ __m128i* const xacc = (__m128i*) acc;
|
3936
4354
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
3937
4355
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
3938
4356
|
const __m128i* const xsecret = (const __m128i *) secret;
|
@@ -3994,40 +4412,66 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
|
|
3994
4412
|
|
3995
4413
|
#if (XXH_VECTOR == XXH_NEON)
|
3996
4414
|
|
4415
|
+
/* forward declarations for the scalar routines */
|
4416
|
+
XXH_FORCE_INLINE void
|
4417
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
|
4418
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
4419
|
+
|
4420
|
+
XXH_FORCE_INLINE void
|
4421
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
4422
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
4423
|
+
|
4424
|
+
/*!
|
4425
|
+
* @internal
|
4426
|
+
* @brief The bulk processing loop for NEON.
|
4427
|
+
*
|
4428
|
+
* The NEON code path is actually partially scalar when running on AArch64. This
|
4429
|
+
* is to optimize the pipelining and can have up to 15% speedup depending on the
|
4430
|
+
* CPU, and it also mitigates some GCC codegen issues.
|
4431
|
+
*
|
4432
|
+
* @see XXH3_NEON_LANES for configuring this and details about this optimization.
|
4433
|
+
*/
|
3997
4434
|
XXH_FORCE_INLINE void
|
3998
4435
|
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
|
3999
4436
|
const void* XXH_RESTRICT input,
|
4000
4437
|
const void* XXH_RESTRICT secret)
|
4001
4438
|
{
|
4002
4439
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
4440
|
+
XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
|
4003
4441
|
{
|
4004
|
-
|
4442
|
+
uint64x2_t* const xacc = (uint64x2_t *) acc;
|
4005
4443
|
/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
|
4006
4444
|
uint8_t const* const xinput = (const uint8_t *) input;
|
4007
4445
|
uint8_t const* const xsecret = (const uint8_t *) secret;
|
4008
4446
|
|
4009
4447
|
size_t i;
|
4010
|
-
|
4448
|
+
/* AArch64 uses both scalar and neon at the same time */
|
4449
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
4450
|
+
XXH3_scalarRound(acc, input, secret, i);
|
4451
|
+
}
|
4452
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
4453
|
+
uint64x2_t acc_vec = xacc[i];
|
4011
4454
|
/* data_vec = xinput[i]; */
|
4012
|
-
|
4455
|
+
uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
|
4013
4456
|
/* key_vec = xsecret[i]; */
|
4014
|
-
|
4457
|
+
uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
|
4015
4458
|
uint64x2_t data_key;
|
4016
4459
|
uint32x2_t data_key_lo, data_key_hi;
|
4017
|
-
/*
|
4018
|
-
uint64x2_t
|
4019
|
-
uint64x2_t const swapped = vextq_u64(data64, data64, 1);
|
4020
|
-
xacc[i] = vaddq_u64 (xacc[i], swapped);
|
4460
|
+
/* acc_vec_2 = swap(data_vec) */
|
4461
|
+
uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
|
4021
4462
|
/* data_key = data_vec ^ key_vec; */
|
4022
|
-
data_key =
|
4463
|
+
data_key = veorq_u64(data_vec, key_vec);
|
4023
4464
|
/* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
|
4024
4465
|
* data_key_hi = (uint32x2_t) (data_key >> 32);
|
4025
4466
|
* data_key = UNDEFINED; */
|
4026
4467
|
XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
|
4027
|
-
/*
|
4028
|
-
|
4029
|
-
|
4468
|
+
/* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
|
4469
|
+
acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
|
4470
|
+
/* xacc[i] += acc_vec_2; */
|
4471
|
+
acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
|
4472
|
+
xacc[i] = acc_vec;
|
4030
4473
|
}
|
4474
|
+
|
4031
4475
|
}
|
4032
4476
|
}
|
4033
4477
|
|
@@ -4041,15 +4485,19 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4041
4485
|
uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
|
4042
4486
|
|
4043
4487
|
size_t i;
|
4044
|
-
|
4488
|
+
/* AArch64 uses both scalar and neon at the same time */
|
4489
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
4490
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
4491
|
+
}
|
4492
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
4045
4493
|
/* xacc[i] ^= (xacc[i] >> 47); */
|
4046
4494
|
uint64x2_t acc_vec = xacc[i];
|
4047
|
-
uint64x2_t shifted = vshrq_n_u64
|
4048
|
-
uint64x2_t data_vec = veorq_u64
|
4495
|
+
uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
|
4496
|
+
uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
|
4049
4497
|
|
4050
4498
|
/* xacc[i] ^= xsecret[i]; */
|
4051
|
-
|
4052
|
-
uint64x2_t data_key = veorq_u64(data_vec,
|
4499
|
+
uint64x2_t key_vec = XXH_vld1q_u64 (xsecret + (i * 16));
|
4500
|
+
uint64x2_t data_key = veorq_u64 (data_vec, key_vec);
|
4053
4501
|
|
4054
4502
|
/* xacc[i] *= XXH_PRIME32_1 */
|
4055
4503
|
uint32x2_t data_key_lo, data_key_hi;
|
@@ -4077,11 +4525,12 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4077
4525
|
*/
|
4078
4526
|
uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
|
4079
4527
|
/* xacc[i] = prod_hi << 32; */
|
4080
|
-
|
4528
|
+
prod_hi = vshlq_n_u64(prod_hi, 32);
|
4081
4529
|
/* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
|
4082
|
-
xacc[i] = vmlal_u32(
|
4530
|
+
xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
|
4083
4531
|
}
|
4084
|
-
|
4532
|
+
}
|
4533
|
+
}
|
4085
4534
|
}
|
4086
4535
|
|
4087
4536
|
#endif
|
@@ -4093,7 +4542,8 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
4093
4542
|
const void* XXH_RESTRICT input,
|
4094
4543
|
const void* XXH_RESTRICT secret)
|
4095
4544
|
{
|
4096
|
-
|
4545
|
+
/* presumed aligned */
|
4546
|
+
unsigned int* const xacc = (unsigned int*) acc;
|
4097
4547
|
xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
|
4098
4548
|
xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
|
4099
4549
|
xxh_u64x2 const v32 = { 32, 32 };
|
@@ -4108,14 +4558,18 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
4108
4558
|
xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
|
4109
4559
|
/* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
|
4110
4560
|
xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
|
4111
|
-
xacc[i]
|
4561
|
+
/* acc_vec = xacc[i]; */
|
4562
|
+
xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
|
4563
|
+
acc_vec += product;
|
4112
4564
|
|
4113
4565
|
/* swap high and low halves */
|
4114
4566
|
#ifdef __s390x__
|
4115
|
-
|
4567
|
+
acc_vec += vec_permi(data_vec, data_vec, 2);
|
4116
4568
|
#else
|
4117
|
-
|
4569
|
+
acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
|
4118
4570
|
#endif
|
4571
|
+
/* xacc[i] = acc_vec; */
|
4572
|
+
vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
|
4119
4573
|
}
|
4120
4574
|
}
|
4121
4575
|
|
@@ -4153,38 +4607,90 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4153
4607
|
|
4154
4608
|
/* scalar variants - universal */
|
4155
4609
|
|
4610
|
+
/*!
|
4611
|
+
* @internal
|
4612
|
+
* @brief Scalar round for @ref XXH3_accumulate_512_scalar().
|
4613
|
+
*
|
4614
|
+
* This is extracted to its own function because the NEON path uses a combination
|
4615
|
+
* of NEON and scalar.
|
4616
|
+
*/
|
4617
|
+
XXH_FORCE_INLINE void
|
4618
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc,
|
4619
|
+
void const* XXH_RESTRICT input,
|
4620
|
+
void const* XXH_RESTRICT secret,
|
4621
|
+
size_t lane)
|
4622
|
+
{
|
4623
|
+
xxh_u64* xacc = (xxh_u64*) acc;
|
4624
|
+
xxh_u8 const* xinput = (xxh_u8 const*) input;
|
4625
|
+
xxh_u8 const* xsecret = (xxh_u8 const*) secret;
|
4626
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
4627
|
+
XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
|
4628
|
+
{
|
4629
|
+
xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
|
4630
|
+
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
|
4631
|
+
xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
|
4632
|
+
xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
4633
|
+
}
|
4634
|
+
}
|
4635
|
+
|
4636
|
+
/*!
|
4637
|
+
* @internal
|
4638
|
+
* @brief Processes a 64 byte block of data using the scalar path.
|
4639
|
+
*/
|
4156
4640
|
XXH_FORCE_INLINE void
|
4157
4641
|
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
|
4158
4642
|
const void* XXH_RESTRICT input,
|
4159
4643
|
const void* XXH_RESTRICT secret)
|
4160
4644
|
{
|
4161
|
-
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
4162
|
-
const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
|
4163
|
-
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
4164
4645
|
size_t i;
|
4165
|
-
|
4646
|
+
/* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
|
4647
|
+
#if defined(__GNUC__) && !defined(__clang__) \
|
4648
|
+
&& (defined(__arm__) || defined(__thumb2__)) \
|
4649
|
+
&& defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
|
4650
|
+
&& XXH_SIZE_OPT <= 0
|
4651
|
+
# pragma GCC unroll 8
|
4652
|
+
#endif
|
4166
4653
|
for (i=0; i < XXH_ACC_NB; i++) {
|
4167
|
-
|
4168
|
-
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
|
4169
|
-
xacc[i ^ 1] += data_val; /* swap adjacent lanes */
|
4170
|
-
xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
4654
|
+
XXH3_scalarRound(acc, input, secret, i);
|
4171
4655
|
}
|
4172
4656
|
}
|
4173
4657
|
|
4658
|
+
/*!
|
4659
|
+
* @internal
|
4660
|
+
* @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
|
4661
|
+
*
|
4662
|
+
* This is extracted to its own function because the NEON path uses a combination
|
4663
|
+
* of NEON and scalar.
|
4664
|
+
*/
|
4174
4665
|
XXH_FORCE_INLINE void
|
4175
|
-
|
4666
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
4667
|
+
void const* XXH_RESTRICT secret,
|
4668
|
+
size_t lane)
|
4176
4669
|
{
|
4177
|
-
|
4670
|
+
xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
4178
4671
|
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
4179
|
-
size_t i;
|
4180
4672
|
XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
|
4181
|
-
|
4182
|
-
|
4183
|
-
xxh_u64
|
4673
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
4674
|
+
{
|
4675
|
+
xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
|
4676
|
+
xxh_u64 acc64 = xacc[lane];
|
4184
4677
|
acc64 = XXH_xorshift64(acc64, 47);
|
4185
4678
|
acc64 ^= key64;
|
4186
4679
|
acc64 *= XXH_PRIME32_1;
|
4187
|
-
xacc[
|
4680
|
+
xacc[lane] = acc64;
|
4681
|
+
}
|
4682
|
+
}
|
4683
|
+
|
4684
|
+
/*!
|
4685
|
+
* @internal
|
4686
|
+
* @brief Scrambles the accumulators after a large chunk has been read
|
4687
|
+
*/
|
4688
|
+
XXH_FORCE_INLINE void
|
4689
|
+
XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
4690
|
+
{
|
4691
|
+
size_t i;
|
4692
|
+
for (i=0; i < XXH_ACC_NB; i++) {
|
4693
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
4188
4694
|
}
|
4189
4695
|
}
|
4190
4696
|
|
@@ -4206,8 +4712,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
4206
4712
|
* placed sequentially, in order, at the top of the unrolled loop.
|
4207
4713
|
*
|
4208
4714
|
* While MOVK is great for generating constants (2 cycles for a 64-bit
|
4209
|
-
* constant compared to 4 cycles for LDR),
|
4210
|
-
*
|
4715
|
+
* constant compared to 4 cycles for LDR), it fights for bandwidth with
|
4716
|
+
* the arithmetic instructions.
|
4717
|
+
*
|
4211
4718
|
* I L S
|
4212
4719
|
* MOVK
|
4213
4720
|
* MOVK
|
@@ -4224,6 +4731,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
4224
4731
|
* ADD LDR
|
4225
4732
|
* SUB STR
|
4226
4733
|
* STR
|
4734
|
+
*
|
4735
|
+
* See XXH3_NEON_LANES for details on the pipsline.
|
4736
|
+
*
|
4227
4737
|
* XXH3_64bits_withSeed, len == 256, Snapdragon 835
|
4228
4738
|
* without hack: 2654.4 MB/s
|
4229
4739
|
* with hack: 3202.9 MB/s
|
@@ -4296,7 +4806,10 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
|
|
4296
4806
|
|
4297
4807
|
#endif
|
4298
4808
|
|
4299
|
-
|
4809
|
+
#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
|
4810
|
+
# undef XXH3_initCustomSecret
|
4811
|
+
# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
4812
|
+
#endif
|
4300
4813
|
|
4301
4814
|
#ifndef XXH_PREFETCH_DIST
|
4302
4815
|
# ifdef __clang__
|
@@ -4422,9 +4935,11 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
4422
4935
|
}
|
4423
4936
|
|
4424
4937
|
/*
|
4425
|
-
* It's important for performance
|
4938
|
+
* It's important for performance to transmit secret's size (when it's static)
|
4939
|
+
* so that the compiler can properly optimize the vectorized loop.
|
4940
|
+
* This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
|
4426
4941
|
*/
|
4427
|
-
|
4942
|
+
XXH_FORCE_INLINE XXH64_hash_t
|
4428
4943
|
XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
4429
4944
|
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
4430
4945
|
{
|
@@ -4433,13 +4948,12 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
|
4433
4948
|
}
|
4434
4949
|
|
4435
4950
|
/*
|
4436
|
-
* It's
|
4437
|
-
*
|
4438
|
-
*
|
4439
|
-
*
|
4440
|
-
* and uses this opportunity to streamline the generated code for better performance.
|
4951
|
+
* It's preferable for performance that XXH3_hashLong is not inlined,
|
4952
|
+
* as it results in a smaller function for small data, easier to the instruction cache.
|
4953
|
+
* Note that inside this no_inline function, we do inline the internal loop,
|
4954
|
+
* and provide a statically defined secret size to allow optimization of vector loop.
|
4441
4955
|
*/
|
4442
|
-
XXH_NO_INLINE XXH64_hash_t
|
4956
|
+
XXH_NO_INLINE XXH_PUREF XXH64_hash_t
|
4443
4957
|
XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
|
4444
4958
|
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
4445
4959
|
{
|
@@ -4465,10 +4979,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
|
|
4465
4979
|
XXH3_f_scrambleAcc f_scramble,
|
4466
4980
|
XXH3_f_initCustomSecret f_initSec)
|
4467
4981
|
{
|
4982
|
+
#if XXH_SIZE_OPT <= 0
|
4468
4983
|
if (seed == 0)
|
4469
4984
|
return XXH3_hashLong_64b_internal(input, len,
|
4470
4985
|
XXH3_kSecret, sizeof(XXH3_kSecret),
|
4471
4986
|
f_acc512, f_scramble);
|
4987
|
+
#endif
|
4472
4988
|
{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
4473
4989
|
f_initSec(secret, seed);
|
4474
4990
|
return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
|
@@ -4517,29 +5033,37 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
|
|
4517
5033
|
|
4518
5034
|
/* === Public entry point === */
|
4519
5035
|
|
4520
|
-
/*! @ingroup
|
4521
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t
|
5036
|
+
/*! @ingroup XXH3_family */
|
5037
|
+
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t length)
|
4522
5038
|
{
|
4523
|
-
return XXH3_64bits_internal(input,
|
5039
|
+
return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
|
4524
5040
|
}
|
4525
5041
|
|
4526
|
-
/*! @ingroup
|
5042
|
+
/*! @ingroup XXH3_family */
|
4527
5043
|
XXH_PUBLIC_API XXH64_hash_t
|
4528
|
-
XXH3_64bits_withSecret(const void* input, size_t
|
5044
|
+
XXH3_64bits_withSecret(const void* input, size_t length, const void* secret, size_t secretSize)
|
4529
5045
|
{
|
4530
|
-
return XXH3_64bits_internal(input,
|
5046
|
+
return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
|
4531
5047
|
}
|
4532
5048
|
|
4533
|
-
/*! @ingroup
|
5049
|
+
/*! @ingroup XXH3_family */
|
4534
5050
|
XXH_PUBLIC_API XXH64_hash_t
|
4535
|
-
XXH3_64bits_withSeed(const void* input, size_t
|
5051
|
+
XXH3_64bits_withSeed(const void* input, size_t length, XXH64_hash_t seed)
|
4536
5052
|
{
|
4537
|
-
return XXH3_64bits_internal(input,
|
5053
|
+
return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
|
4538
5054
|
}
|
4539
5055
|
|
5056
|
+
XXH_PUBLIC_API XXH64_hash_t
|
5057
|
+
XXH3_64bits_withSecretandSeed(const void* input, size_t length, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5058
|
+
{
|
5059
|
+
if (length <= XXH3_MIDSIZE_MAX)
|
5060
|
+
return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
5061
|
+
return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
|
5062
|
+
}
|
4540
5063
|
|
4541
|
-
/* === XXH3 streaming === */
|
4542
5064
|
|
5065
|
+
/* === XXH3 streaming === */
|
5066
|
+
#ifndef XXH_NO_STREAM
|
4543
5067
|
/*
|
4544
5068
|
* Malloc's a pointer that is always aligned to align.
|
4545
5069
|
*
|
@@ -4563,7 +5087,7 @@ XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
4563
5087
|
*
|
4564
5088
|
* Align must be a power of 2 and 8 <= align <= 128.
|
4565
5089
|
*/
|
4566
|
-
static void* XXH_alignedMalloc(size_t s, size_t align)
|
5090
|
+
static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
|
4567
5091
|
{
|
4568
5092
|
XXH_ASSERT(align <= 128 && align >= 8); /* range check */
|
4569
5093
|
XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
|
@@ -4605,7 +5129,7 @@ static void XXH_alignedFree(void* p)
|
|
4605
5129
|
XXH_free(base);
|
4606
5130
|
}
|
4607
5131
|
}
|
4608
|
-
/*! @ingroup
|
5132
|
+
/*! @ingroup XXH3_family */
|
4609
5133
|
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
|
4610
5134
|
{
|
4611
5135
|
XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
|
@@ -4614,24 +5138,24 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
|
|
4614
5138
|
return state;
|
4615
5139
|
}
|
4616
5140
|
|
4617
|
-
/*! @ingroup
|
5141
|
+
/*! @ingroup XXH3_family */
|
4618
5142
|
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
|
4619
5143
|
{
|
4620
5144
|
XXH_alignedFree(statePtr);
|
4621
5145
|
return XXH_OK;
|
4622
5146
|
}
|
4623
5147
|
|
4624
|
-
/*! @ingroup
|
5148
|
+
/*! @ingroup XXH3_family */
|
4625
5149
|
XXH_PUBLIC_API void
|
4626
5150
|
XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
|
4627
5151
|
{
|
4628
|
-
|
5152
|
+
XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
|
4629
5153
|
}
|
4630
5154
|
|
4631
5155
|
static void
|
4632
5156
|
XXH3_reset_internal(XXH3_state_t* statePtr,
|
4633
|
-
|
4634
|
-
|
5157
|
+
XXH64_hash_t seed,
|
5158
|
+
const void* secret, size_t secretSize)
|
4635
5159
|
{
|
4636
5160
|
size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
|
4637
5161
|
size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
|
@@ -4648,13 +5172,14 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
|
|
4648
5172
|
statePtr->acc[6] = XXH_PRIME64_5;
|
4649
5173
|
statePtr->acc[7] = XXH_PRIME32_1;
|
4650
5174
|
statePtr->seed = seed;
|
5175
|
+
statePtr->useSeed = (seed != 0);
|
4651
5176
|
statePtr->extSecret = (const unsigned char*)secret;
|
4652
5177
|
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
4653
5178
|
statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
|
4654
5179
|
statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
|
4655
5180
|
}
|
4656
5181
|
|
4657
|
-
/*! @ingroup
|
5182
|
+
/*! @ingroup XXH3_family */
|
4658
5183
|
XXH_PUBLIC_API XXH_errorcode
|
4659
5184
|
XXH3_64bits_reset(XXH3_state_t* statePtr)
|
4660
5185
|
{
|
@@ -4663,7 +5188,7 @@ XXH3_64bits_reset(XXH3_state_t* statePtr)
|
|
4663
5188
|
return XXH_OK;
|
4664
5189
|
}
|
4665
5190
|
|
4666
|
-
/*! @ingroup
|
5191
|
+
/*! @ingroup XXH3_family */
|
4667
5192
|
XXH_PUBLIC_API XXH_errorcode
|
4668
5193
|
XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
|
4669
5194
|
{
|
@@ -4674,17 +5199,30 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t
|
|
4674
5199
|
return XXH_OK;
|
4675
5200
|
}
|
4676
5201
|
|
4677
|
-
/*! @ingroup
|
5202
|
+
/*! @ingroup XXH3_family */
|
4678
5203
|
XXH_PUBLIC_API XXH_errorcode
|
4679
5204
|
XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
4680
5205
|
{
|
4681
5206
|
if (statePtr == NULL) return XXH_ERROR;
|
4682
5207
|
if (seed==0) return XXH3_64bits_reset(statePtr);
|
4683
|
-
if (seed != statePtr->seed)
|
5208
|
+
if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
|
5209
|
+
XXH3_initCustomSecret(statePtr->customSecret, seed);
|
4684
5210
|
XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
|
4685
5211
|
return XXH_OK;
|
4686
5212
|
}
|
4687
5213
|
|
5214
|
+
/*! @ingroup XXH3_family */
|
5215
|
+
XXH_PUBLIC_API XXH_errorcode
|
5216
|
+
XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
|
5217
|
+
{
|
5218
|
+
if (statePtr == NULL) return XXH_ERROR;
|
5219
|
+
if (secret == NULL) return XXH_ERROR;
|
5220
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
5221
|
+
XXH3_reset_internal(statePtr, seed64, secret, secretSize);
|
5222
|
+
statePtr->useSeed = 1; /* always, even if seed64==0 */
|
5223
|
+
return XXH_OK;
|
5224
|
+
}
|
5225
|
+
|
4688
5226
|
/* Note : when XXH3_consumeStripes() is invoked,
|
4689
5227
|
* there must be a guarantee that at least one more byte must be consumed from input
|
4690
5228
|
* so that the function can blindly consume all stripes using the "normal" secret segment */
|
@@ -4712,35 +5250,48 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
|
|
4712
5250
|
}
|
4713
5251
|
}
|
4714
5252
|
|
5253
|
+
#ifndef XXH3_STREAM_USE_STACK
|
5254
|
+
# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
|
5255
|
+
# define XXH3_STREAM_USE_STACK 1
|
5256
|
+
# endif
|
5257
|
+
#endif
|
4715
5258
|
/*
|
4716
5259
|
* Both XXH3_64bits_update and XXH3_128bits_update use this routine.
|
4717
5260
|
*/
|
4718
5261
|
XXH_FORCE_INLINE XXH_errorcode
|
4719
|
-
XXH3_update(XXH3_state_t* state,
|
4720
|
-
const xxh_u8* input, size_t len,
|
5262
|
+
XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
|
5263
|
+
const xxh_u8* XXH_RESTRICT input, size_t len,
|
4721
5264
|
XXH3_f_accumulate_512 f_acc512,
|
4722
5265
|
XXH3_f_scrambleAcc f_scramble)
|
4723
5266
|
{
|
4724
|
-
if (input==NULL)
|
4725
|
-
|
5267
|
+
if (input==NULL) {
|
5268
|
+
XXH_ASSERT(len == 0);
|
4726
5269
|
return XXH_OK;
|
4727
|
-
|
4728
|
-
return XXH_ERROR;
|
4729
|
-
#endif
|
5270
|
+
}
|
4730
5271
|
|
5272
|
+
XXH_ASSERT(state != NULL);
|
4731
5273
|
{ const xxh_u8* const bEnd = input + len;
|
4732
5274
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
4733
|
-
|
5275
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
5276
|
+
/* For some reason, gcc and MSVC seem to suffer greatly
|
5277
|
+
* when operating accumulators directly into state.
|
5278
|
+
* Operating into stack space seems to enable proper optimization.
|
5279
|
+
* clang, on the other hand, doesn't seem to need this trick */
|
5280
|
+
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
|
5281
|
+
#else
|
5282
|
+
xxh_u64* XXH_RESTRICT const acc = state->acc;
|
5283
|
+
#endif
|
4734
5284
|
state->totalLen += len;
|
4735
5285
|
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
|
4736
5286
|
|
4737
|
-
|
5287
|
+
/* small input : just fill in tmp buffer */
|
5288
|
+
if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
|
4738
5289
|
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
|
4739
5290
|
state->bufferedSize += (XXH32_hash_t)len;
|
4740
5291
|
return XXH_OK;
|
4741
5292
|
}
|
4742
|
-
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
4743
5293
|
|
5294
|
+
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
4744
5295
|
#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
|
4745
5296
|
XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
|
4746
5297
|
|
@@ -4752,7 +5303,7 @@ XXH3_update(XXH3_state_t* state,
|
|
4752
5303
|
size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
|
4753
5304
|
XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
|
4754
5305
|
input += loadSize;
|
4755
|
-
XXH3_consumeStripes(
|
5306
|
+
XXH3_consumeStripes(acc,
|
4756
5307
|
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
4757
5308
|
state->buffer, XXH3_INTERNALBUFFER_STRIPES,
|
4758
5309
|
secret, state->secretLimit,
|
@@ -4761,31 +5312,68 @@ XXH3_update(XXH3_state_t* state,
|
|
4761
5312
|
}
|
4762
5313
|
XXH_ASSERT(input < bEnd);
|
4763
5314
|
|
4764
|
-
/*
|
4765
|
-
if (bEnd - input >
|
4766
|
-
|
4767
|
-
|
4768
|
-
|
4769
|
-
|
4770
|
-
|
4771
|
-
|
4772
|
-
|
4773
|
-
|
4774
|
-
|
4775
|
-
|
4776
|
-
|
5315
|
+
/* large input to consume : ingest per full block */
|
5316
|
+
if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
|
5317
|
+
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
|
5318
|
+
XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
|
5319
|
+
/* join to current block's end */
|
5320
|
+
{ size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
|
5321
|
+
XXH_ASSERT(nbStripesToEnd <= nbStripes);
|
5322
|
+
XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
|
5323
|
+
f_scramble(acc, secret + state->secretLimit);
|
5324
|
+
state->nbStripesSoFar = 0;
|
5325
|
+
input += nbStripesToEnd * XXH_STRIPE_LEN;
|
5326
|
+
nbStripes -= nbStripesToEnd;
|
5327
|
+
}
|
5328
|
+
/* consume per entire blocks */
|
5329
|
+
while(nbStripes >= state->nbStripesPerBlock) {
|
5330
|
+
XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
|
5331
|
+
f_scramble(acc, secret + state->secretLimit);
|
5332
|
+
input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
|
5333
|
+
nbStripes -= state->nbStripesPerBlock;
|
5334
|
+
}
|
5335
|
+
/* consume last partial block */
|
5336
|
+
XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
|
5337
|
+
input += nbStripes * XXH_STRIPE_LEN;
|
5338
|
+
XXH_ASSERT(input < bEnd); /* at least some bytes left */
|
5339
|
+
state->nbStripesSoFar = nbStripes;
|
5340
|
+
/* buffer predecessor of last partial stripe */
|
5341
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
5342
|
+
XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
|
5343
|
+
} else {
|
5344
|
+
/* content to consume <= block size */
|
5345
|
+
/* Consume input by a multiple of internal buffer size */
|
5346
|
+
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
|
5347
|
+
const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
|
5348
|
+
do {
|
5349
|
+
XXH3_consumeStripes(acc,
|
5350
|
+
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
5351
|
+
input, XXH3_INTERNALBUFFER_STRIPES,
|
5352
|
+
secret, state->secretLimit,
|
5353
|
+
f_acc512, f_scramble);
|
5354
|
+
input += XXH3_INTERNALBUFFER_SIZE;
|
5355
|
+
} while (input<limit);
|
5356
|
+
/* buffer predecessor of last partial stripe */
|
5357
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
5358
|
+
}
|
4777
5359
|
}
|
4778
|
-
XXH_ASSERT(input < bEnd);
|
4779
5360
|
|
4780
5361
|
/* Some remaining input (always) : buffer it */
|
5362
|
+
XXH_ASSERT(input < bEnd);
|
5363
|
+
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
|
5364
|
+
XXH_ASSERT(state->bufferedSize == 0);
|
4781
5365
|
XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
|
4782
5366
|
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
|
5367
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
5368
|
+
/* save stack accumulators into state */
|
5369
|
+
memcpy(state->acc, acc, sizeof(acc));
|
5370
|
+
#endif
|
4783
5371
|
}
|
4784
5372
|
|
4785
5373
|
return XXH_OK;
|
4786
5374
|
}
|
4787
5375
|
|
4788
|
-
/*! @ingroup
|
5376
|
+
/*! @ingroup XXH3_family */
|
4789
5377
|
XXH_PUBLIC_API XXH_errorcode
|
4790
5378
|
XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
|
4791
5379
|
{
|
@@ -4803,7 +5391,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
4803
5391
|
* Digest on a local copy. This way, the state remains unaltered, and it can
|
4804
5392
|
* continue ingesting more input afterwards.
|
4805
5393
|
*/
|
4806
|
-
|
5394
|
+
XXH_memcpy(acc, state->acc, sizeof(state->acc));
|
4807
5395
|
if (state->bufferedSize >= XXH_STRIPE_LEN) {
|
4808
5396
|
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
|
4809
5397
|
size_t nbStripesSoFar = state->nbStripesSoFar;
|
@@ -4820,15 +5408,15 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
4820
5408
|
xxh_u8 lastStripe[XXH_STRIPE_LEN];
|
4821
5409
|
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
|
4822
5410
|
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
|
4823
|
-
|
4824
|
-
|
5411
|
+
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
|
5412
|
+
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
|
4825
5413
|
XXH3_accumulate_512(acc,
|
4826
5414
|
lastStripe,
|
4827
5415
|
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
|
4828
5416
|
}
|
4829
5417
|
}
|
4830
5418
|
|
4831
|
-
/*! @ingroup
|
5419
|
+
/*! @ingroup XXH3_family */
|
4832
5420
|
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
|
4833
5421
|
{
|
4834
5422
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
@@ -4840,57 +5428,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
|
|
4840
5428
|
(xxh_u64)state->totalLen * XXH_PRIME64_1);
|
4841
5429
|
}
|
4842
5430
|
/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
|
4843
|
-
if (state->
|
5431
|
+
if (state->useSeed)
|
4844
5432
|
return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
|
4845
5433
|
return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
|
4846
5434
|
secret, state->secretLimit + XXH_STRIPE_LEN);
|
4847
5435
|
}
|
4848
|
-
|
4849
|
-
|
4850
|
-
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
4851
|
-
|
4852
|
-
/*! @ingroup xxh3_family */
|
4853
|
-
XXH_PUBLIC_API void
|
4854
|
-
XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
|
4855
|
-
{
|
4856
|
-
XXH_ASSERT(secretBuffer != NULL);
|
4857
|
-
if (customSeedSize == 0) {
|
4858
|
-
memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
4859
|
-
return;
|
4860
|
-
}
|
4861
|
-
XXH_ASSERT(customSeed != NULL);
|
4862
|
-
|
4863
|
-
{ size_t const segmentSize = sizeof(XXH128_hash_t);
|
4864
|
-
size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
|
4865
|
-
XXH128_canonical_t scrambler;
|
4866
|
-
XXH64_hash_t seeds[12];
|
4867
|
-
size_t segnb;
|
4868
|
-
XXH_ASSERT(nbSegments == 12);
|
4869
|
-
XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
|
4870
|
-
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
4871
|
-
|
4872
|
-
/*
|
4873
|
-
* Copy customSeed to seeds[], truncating or repeating as necessary.
|
4874
|
-
*/
|
4875
|
-
{ size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
|
4876
|
-
size_t filled = toFill;
|
4877
|
-
memcpy(seeds, customSeed, toFill);
|
4878
|
-
while (filled < sizeof(seeds)) {
|
4879
|
-
toFill = XXH_MIN(filled, sizeof(seeds) - filled);
|
4880
|
-
memcpy((char*)seeds + filled, seeds, toFill);
|
4881
|
-
filled += toFill;
|
4882
|
-
} }
|
4883
|
-
|
4884
|
-
/* generate secret */
|
4885
|
-
memcpy(secretBuffer, &scrambler, sizeof(scrambler));
|
4886
|
-
for (segnb=1; segnb < nbSegments; segnb++) {
|
4887
|
-
size_t const segmentStart = segnb * segmentSize;
|
4888
|
-
XXH128_canonical_t segment;
|
4889
|
-
XXH128_canonicalFromHash(&segment,
|
4890
|
-
XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
|
4891
|
-
memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
|
4892
|
-
} }
|
4893
|
-
}
|
5436
|
+
#endif /* !XXH_NO_STREAM */
|
4894
5437
|
|
4895
5438
|
|
4896
5439
|
/* ==========================================
|
@@ -4910,7 +5453,7 @@ XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSee
|
|
4910
5453
|
* fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
|
4911
5454
|
*/
|
4912
5455
|
|
4913
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
5456
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
4914
5457
|
XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
4915
5458
|
{
|
4916
5459
|
/* A doubled version of 1to3_64b with different constants. */
|
@@ -4939,7 +5482,7 @@ XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
|
|
4939
5482
|
}
|
4940
5483
|
}
|
4941
5484
|
|
4942
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
5485
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
4943
5486
|
XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
4944
5487
|
{
|
4945
5488
|
XXH_ASSERT(input != NULL);
|
@@ -4966,7 +5509,7 @@ XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
|
|
4966
5509
|
}
|
4967
5510
|
}
|
4968
5511
|
|
4969
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
5512
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
4970
5513
|
XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
4971
5514
|
{
|
4972
5515
|
XXH_ASSERT(input != NULL);
|
@@ -5041,7 +5584,7 @@ XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
|
|
5041
5584
|
/*
|
5042
5585
|
* Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
|
5043
5586
|
*/
|
5044
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
5587
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
5045
5588
|
XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
5046
5589
|
{
|
5047
5590
|
XXH_ASSERT(len <= 16);
|
@@ -5072,7 +5615,7 @@ XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
|
|
5072
5615
|
}
|
5073
5616
|
|
5074
5617
|
|
5075
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
5618
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
5076
5619
|
XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
5077
5620
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
5078
5621
|
XXH64_hash_t seed)
|
@@ -5083,6 +5626,16 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
5083
5626
|
{ XXH128_hash_t acc;
|
5084
5627
|
acc.low64 = len * XXH_PRIME64_1;
|
5085
5628
|
acc.high64 = 0;
|
5629
|
+
|
5630
|
+
#if XXH_SIZE_OPT >= 1
|
5631
|
+
{
|
5632
|
+
/* Smaller, but slightly slower. */
|
5633
|
+
size_t i = (len - 1) / 32;
|
5634
|
+
do {
|
5635
|
+
acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
|
5636
|
+
} while (i-- != 0);
|
5637
|
+
}
|
5638
|
+
#else
|
5086
5639
|
if (len > 32) {
|
5087
5640
|
if (len > 64) {
|
5088
5641
|
if (len > 96) {
|
@@ -5093,6 +5646,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
5093
5646
|
acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
|
5094
5647
|
}
|
5095
5648
|
acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
|
5649
|
+
#endif
|
5096
5650
|
{ XXH128_hash_t h128;
|
5097
5651
|
h128.low64 = acc.low64 + acc.high64;
|
5098
5652
|
h128.high64 = (acc.low64 * XXH_PRIME64_1)
|
@@ -5105,7 +5659,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
5105
5659
|
}
|
5106
5660
|
}
|
5107
5661
|
|
5108
|
-
XXH_NO_INLINE XXH128_hash_t
|
5662
|
+
XXH_NO_INLINE XXH_PUREF XXH128_hash_t
|
5109
5663
|
XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
5110
5664
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
5111
5665
|
XXH64_hash_t seed)
|
@@ -5180,9 +5734,9 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
5180
5734
|
}
|
5181
5735
|
|
5182
5736
|
/*
|
5183
|
-
* It's important for performance that XXH3_hashLong is not inlined.
|
5737
|
+
* It's important for performance that XXH3_hashLong() is not inlined.
|
5184
5738
|
*/
|
5185
|
-
XXH_NO_INLINE XXH128_hash_t
|
5739
|
+
XXH_NO_INLINE XXH_PUREF XXH128_hash_t
|
5186
5740
|
XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
|
5187
5741
|
XXH64_hash_t seed64,
|
5188
5742
|
const void* XXH_RESTRICT secret, size_t secretLen)
|
@@ -5193,9 +5747,10 @@ XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
|
|
5193
5747
|
}
|
5194
5748
|
|
5195
5749
|
/*
|
5196
|
-
* It's important for performance
|
5750
|
+
* It's important for performance to pass @p secretLen (when it's static)
|
5751
|
+
* to the compiler, so that it can properly optimize the vectorized loop.
|
5197
5752
|
*/
|
5198
|
-
|
5753
|
+
XXH_FORCE_INLINE XXH128_hash_t
|
5199
5754
|
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
5200
5755
|
XXH64_hash_t seed64,
|
5201
5756
|
const void* XXH_RESTRICT secret, size_t secretLen)
|
@@ -5262,7 +5817,7 @@ XXH3_128bits_internal(const void* input, size_t len,
|
|
5262
5817
|
|
5263
5818
|
/* === Public XXH128 API === */
|
5264
5819
|
|
5265
|
-
/*! @ingroup
|
5820
|
+
/*! @ingroup XXH3_family */
|
5266
5821
|
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
|
5267
5822
|
{
|
5268
5823
|
return XXH3_128bits_internal(input, len, 0,
|
@@ -5270,7 +5825,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
|
|
5270
5825
|
XXH3_hashLong_128b_default);
|
5271
5826
|
}
|
5272
5827
|
|
5273
|
-
/*! @ingroup
|
5828
|
+
/*! @ingroup XXH3_family */
|
5274
5829
|
XXH_PUBLIC_API XXH128_hash_t
|
5275
5830
|
XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
|
5276
5831
|
{
|
@@ -5279,7 +5834,7 @@ XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_
|
|
5279
5834
|
XXH3_hashLong_128b_withSecret);
|
5280
5835
|
}
|
5281
5836
|
|
5282
|
-
/*! @ingroup
|
5837
|
+
/*! @ingroup XXH3_family */
|
5283
5838
|
XXH_PUBLIC_API XXH128_hash_t
|
5284
5839
|
XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
5285
5840
|
{
|
@@ -5288,7 +5843,16 @@ XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
5288
5843
|
XXH3_hashLong_128b_withSeed);
|
5289
5844
|
}
|
5290
5845
|
|
5291
|
-
/*! @ingroup
|
5846
|
+
/*! @ingroup XXH3_family */
|
5847
|
+
XXH_PUBLIC_API XXH128_hash_t
|
5848
|
+
XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5849
|
+
{
|
5850
|
+
if (len <= XXH3_MIDSIZE_MAX)
|
5851
|
+
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
5852
|
+
return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
|
5853
|
+
}
|
5854
|
+
|
5855
|
+
/*! @ingroup XXH3_family */
|
5292
5856
|
XXH_PUBLIC_API XXH128_hash_t
|
5293
5857
|
XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
5294
5858
|
{
|
@@ -5297,44 +5861,41 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
|
5297
5861
|
|
5298
5862
|
|
5299
5863
|
/* === XXH3 128-bit streaming === */
|
5300
|
-
|
5864
|
+
#ifndef XXH_NO_STREAM
|
5301
5865
|
/*
|
5302
|
-
* All
|
5866
|
+
* All initialization and update functions are identical to 64-bit streaming variant.
|
5303
5867
|
* The only difference is the finalization routine.
|
5304
5868
|
*/
|
5305
5869
|
|
5306
|
-
/*! @ingroup
|
5870
|
+
/*! @ingroup XXH3_family */
|
5307
5871
|
XXH_PUBLIC_API XXH_errorcode
|
5308
5872
|
XXH3_128bits_reset(XXH3_state_t* statePtr)
|
5309
5873
|
{
|
5310
|
-
|
5311
|
-
XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
5312
|
-
return XXH_OK;
|
5874
|
+
return XXH3_64bits_reset(statePtr);
|
5313
5875
|
}
|
5314
5876
|
|
5315
|
-
/*! @ingroup
|
5877
|
+
/*! @ingroup XXH3_family */
|
5316
5878
|
XXH_PUBLIC_API XXH_errorcode
|
5317
5879
|
XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
|
5318
5880
|
{
|
5319
|
-
|
5320
|
-
XXH3_reset_internal(statePtr, 0, secret, secretSize);
|
5321
|
-
if (secret == NULL) return XXH_ERROR;
|
5322
|
-
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
5323
|
-
return XXH_OK;
|
5881
|
+
return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
|
5324
5882
|
}
|
5325
5883
|
|
5326
|
-
/*! @ingroup
|
5884
|
+
/*! @ingroup XXH3_family */
|
5327
5885
|
XXH_PUBLIC_API XXH_errorcode
|
5328
5886
|
XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
5329
5887
|
{
|
5330
|
-
|
5331
|
-
|
5332
|
-
|
5333
|
-
|
5334
|
-
|
5888
|
+
return XXH3_64bits_reset_withSeed(statePtr, seed);
|
5889
|
+
}
|
5890
|
+
|
5891
|
+
/*! @ingroup XXH3_family */
|
5892
|
+
XXH_PUBLIC_API XXH_errorcode
|
5893
|
+
XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
|
5894
|
+
{
|
5895
|
+
return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
|
5335
5896
|
}
|
5336
5897
|
|
5337
|
-
/*! @ingroup
|
5898
|
+
/*! @ingroup XXH3_family */
|
5338
5899
|
XXH_PUBLIC_API XXH_errorcode
|
5339
5900
|
XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
|
5340
5901
|
{
|
@@ -5342,7 +5903,7 @@ XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
|
|
5342
5903
|
XXH3_accumulate_512, XXH3_scrambleAcc);
|
5343
5904
|
}
|
5344
5905
|
|
5345
|
-
/*! @ingroup
|
5906
|
+
/*! @ingroup XXH3_family */
|
5346
5907
|
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
|
5347
5908
|
{
|
5348
5909
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
@@ -5367,13 +5928,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
|
|
5367
5928
|
return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
|
5368
5929
|
secret, state->secretLimit + XXH_STRIPE_LEN);
|
5369
5930
|
}
|
5370
|
-
|
5931
|
+
#endif /* !XXH_NO_STREAM */
|
5371
5932
|
/* 128-bit utility functions */
|
5372
5933
|
|
5373
5934
|
#include <string.h> /* memcmp, memcpy */
|
5374
5935
|
|
5375
5936
|
/* return : 1 is equal, 0 if different */
|
5376
|
-
/*! @ingroup
|
5937
|
+
/*! @ingroup XXH3_family */
|
5377
5938
|
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
|
5378
5939
|
{
|
5379
5940
|
/* note : XXH128_hash_t is compact, it has no padding byte */
|
@@ -5381,10 +5942,10 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
|
|
5381
5942
|
}
|
5382
5943
|
|
5383
5944
|
/* This prototype is compatible with stdlib's qsort().
|
5384
|
-
* return : >0 if *h128_1 > *h128_2
|
5385
|
-
*
|
5386
|
-
*
|
5387
|
-
/*! @ingroup
|
5945
|
+
* @return : >0 if *h128_1 > *h128_2
|
5946
|
+
* <0 if *h128_1 < *h128_2
|
5947
|
+
* =0 if *h128_1 == *h128_2 */
|
5948
|
+
/*! @ingroup XXH3_family */
|
5388
5949
|
XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
|
5389
5950
|
{
|
5390
5951
|
XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
|
@@ -5397,7 +5958,7 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
|
|
5397
5958
|
|
5398
5959
|
|
5399
5960
|
/*====== Canonical representation ======*/
|
5400
|
-
/*! @ingroup
|
5961
|
+
/*! @ingroup XXH3_family */
|
5401
5962
|
XXH_PUBLIC_API void
|
5402
5963
|
XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
|
5403
5964
|
{
|
@@ -5406,11 +5967,11 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
|
|
5406
5967
|
hash.high64 = XXH_swap64(hash.high64);
|
5407
5968
|
hash.low64 = XXH_swap64(hash.low64);
|
5408
5969
|
}
|
5409
|
-
|
5410
|
-
|
5970
|
+
XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
|
5971
|
+
XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
|
5411
5972
|
}
|
5412
5973
|
|
5413
|
-
/*! @ingroup
|
5974
|
+
/*! @ingroup XXH3_family */
|
5414
5975
|
XXH_PUBLIC_API XXH128_hash_t
|
5415
5976
|
XXH128_hashFromCanonical(const XXH128_canonical_t* src)
|
5416
5977
|
{
|
@@ -5420,10 +5981,81 @@ XXH128_hashFromCanonical(const XXH128_canonical_t* src)
|
|
5420
5981
|
return h;
|
5421
5982
|
}
|
5422
5983
|
|
5984
|
+
|
5985
|
+
|
5986
|
+
/* ==========================================
|
5987
|
+
* Secret generators
|
5988
|
+
* ==========================================
|
5989
|
+
*/
|
5990
|
+
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
5991
|
+
|
5992
|
+
XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
|
5993
|
+
{
|
5994
|
+
XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
|
5995
|
+
XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
|
5996
|
+
}
|
5997
|
+
|
5998
|
+
/*! @ingroup XXH3_family */
|
5999
|
+
XXH_PUBLIC_API XXH_errorcode
|
6000
|
+
XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
|
6001
|
+
{
|
6002
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
6003
|
+
XXH_ASSERT(secretBuffer != NULL);
|
6004
|
+
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
6005
|
+
#else
|
6006
|
+
/* production mode, assert() are disabled */
|
6007
|
+
if (secretBuffer == NULL) return XXH_ERROR;
|
6008
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
6009
|
+
#endif
|
6010
|
+
|
6011
|
+
if (customSeedSize == 0) {
|
6012
|
+
customSeed = XXH3_kSecret;
|
6013
|
+
customSeedSize = XXH_SECRET_DEFAULT_SIZE;
|
6014
|
+
}
|
6015
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
6016
|
+
XXH_ASSERT(customSeed != NULL);
|
6017
|
+
#else
|
6018
|
+
if (customSeed == NULL) return XXH_ERROR;
|
6019
|
+
#endif
|
6020
|
+
|
6021
|
+
/* Fill secretBuffer with a copy of customSeed - repeat as needed */
|
6022
|
+
{ size_t pos = 0;
|
6023
|
+
while (pos < secretSize) {
|
6024
|
+
size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
|
6025
|
+
memcpy((char*)secretBuffer + pos, customSeed, toCopy);
|
6026
|
+
pos += toCopy;
|
6027
|
+
} }
|
6028
|
+
|
6029
|
+
{ size_t const nbSeg16 = secretSize / 16;
|
6030
|
+
size_t n;
|
6031
|
+
XXH128_canonical_t scrambler;
|
6032
|
+
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
6033
|
+
for (n=0; n<nbSeg16; n++) {
|
6034
|
+
XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
|
6035
|
+
XXH3_combine16((char*)secretBuffer + n*16, h128);
|
6036
|
+
}
|
6037
|
+
/* last segment */
|
6038
|
+
XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
|
6039
|
+
}
|
6040
|
+
return XXH_OK;
|
6041
|
+
}
|
6042
|
+
|
6043
|
+
/*! @ingroup XXH3_family */
|
6044
|
+
XXH_PUBLIC_API void
|
6045
|
+
XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
|
6046
|
+
{
|
6047
|
+
XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
6048
|
+
XXH3_initCustomSecret(secret, seed);
|
6049
|
+
XXH_ASSERT(secretBuffer != NULL);
|
6050
|
+
memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
|
6051
|
+
}
|
6052
|
+
|
6053
|
+
|
6054
|
+
|
5423
6055
|
/* Pop our optimization override from above */
|
5424
6056
|
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
|
5425
6057
|
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
|
5426
|
-
&& defined(__OPTIMIZE__) &&
|
6058
|
+
&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
|
5427
6059
|
# pragma GCC pop_options
|
5428
6060
|
#endif
|
5429
6061
|
|