digest-xxhash 0.2.1 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * xxHash - Extremely Fast Hash algorithm
3
3
  * Header File
4
- * Copyright (C) 2012-2020 Yann Collet
4
+ * Copyright (C) 2012-2021 Yann Collet
5
5
  *
6
6
  * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
7
7
  *
@@ -32,49 +32,142 @@
32
32
  * - xxHash homepage: https://www.xxhash.com
33
33
  * - xxHash source repository: https://github.com/Cyan4973/xxHash
34
34
  */
35
+
35
36
  /*!
36
37
  * @mainpage xxHash
37
38
  *
39
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
40
+ * limits.
41
+ *
42
+ * It is proposed in four flavors, in three families:
43
+ * 1. @ref XXH32_family
44
+ * - Classic 32-bit hash function. Simple, compact, and runs on almost all
45
+ * 32-bit and 64-bit systems.
46
+ * 2. @ref XXH64_family
47
+ * - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
48
+ * 64-bit systems (but _not_ 32-bit systems).
49
+ * 3. @ref XXH3_family
50
+ * - Modern 64-bit and 128-bit hash function family which features improved
51
+ * strength and performance across the board, especially on smaller data.
52
+ * It benefits greatly from SIMD and 64-bit without requiring it.
53
+ *
54
+ * Benchmarks
55
+ * ---
56
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
57
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
58
+ *
59
+ * | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |
60
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
61
+ * | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |
62
+ * | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |
63
+ * | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |
64
+ * | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |
65
+ * | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |
66
+ * | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |
67
+ * | RAM sequential read | | N/A | 28.0 GB/s | N/A |
68
+ * | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |
69
+ * | City64 | | 64 | 22.0 GB/s | 76.6 |
70
+ * | T1ha2 | | 64 | 22.0 GB/s | 99.0 |
71
+ * | City128 | | 128 | 21.7 GB/s | 57.7 |
72
+ * | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |
73
+ * | XXH64() | | 64 | 19.4 GB/s | 71.0 |
74
+ * | SpookyHash | | 64 | 19.3 GB/s | 53.2 |
75
+ * | Mum | | 64 | 18.0 GB/s | 67.0 |
76
+ * | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |
77
+ * | XXH32() | | 32 | 9.7 GB/s | 71.9 |
78
+ * | City32 | | 32 | 9.1 GB/s | 66.0 |
79
+ * | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |
80
+ * | Murmur3 | | 32 | 3.9 GB/s | 56.1 |
81
+ * | SipHash* | | 64 | 3.0 GB/s | 43.2 |
82
+ * | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |
83
+ * | HighwayHash | | 64 | 1.4 GB/s | 6.0 |
84
+ * | FNV64 | | 64 | 1.2 GB/s | 62.7 |
85
+ * | Blake2* | | 256 | 1.1 GB/s | 5.1 |
86
+ * | SHA1* | | 160 | 0.8 GB/s | 5.6 |
87
+ * | MD5* | | 128 | 0.6 GB/s | 7.8 |
88
+ * @note
89
+ * - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
90
+ * even though it is mandatory on x64.
91
+ * - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
92
+ * by modern standards.
93
+ * - Small data velocity is a rough average of algorithm's efficiency for small
94
+ * data. For more accurate information, see the wiki.
95
+ * - More benchmarks and strength tests are found on the wiki:
96
+ * https://github.com/Cyan4973/xxHash/wiki
97
+ *
98
+ * Usage
99
+ * ------
100
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
101
+ * substitution.
102
+ *
103
+ * @pre
104
+ * For functions which take an input and length parameter, the following
105
+ * requirements are assumed:
106
+ * - The range from [`input`, `input + length`) is valid, readable memory.
107
+ * - The only exception is if the `length` is `0`, `input` may be `NULL`.
108
+ * - For C++, the objects must have the *TriviallyCopyable* property, as the
109
+ * functions access bytes directly as if it was an array of `unsigned char`.
110
+ *
111
+ * @anchor single_shot_example
112
+ * **Single Shot**
113
+ *
114
+ * These functions are stateless functions which hash a contiguous block of memory,
115
+ * immediately returning the result. They are the easiest and usually the fastest
116
+ * option.
117
+ *
118
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
119
+ *
120
+ * @code{.c}
121
+ * #include <string.h>
122
+ * #include "xxhash.h"
123
+ *
124
+ * // Example for a function which hashes a null terminated string with XXH32().
125
+ * XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
126
+ * {
127
+ * // NULL pointers are only valid if the length is zero
128
+ * size_t length = (string == NULL) ? 0 : strlen(string);
129
+ * return XXH32(string, length, seed);
130
+ * }
131
+ * @endcode
132
+ *
133
+ * @anchor streaming_example
134
+ * **Streaming**
135
+ *
136
+ * These groups of functions allow incremental hashing of unknown size, even
137
+ * more than what would fit in a size_t.
138
+ *
139
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
140
+ *
141
+ * @code{.c}
142
+ * #include <stdio.h>
143
+ * #include <assert.h>
144
+ * #include "xxhash.h"
145
+ * // Example for a function which hashes a FILE incrementally with XXH3_64bits().
146
+ * XXH64_hash_t hashFile(FILE* f)
147
+ * {
148
+ * // Allocate a state struct. Do not just use malloc() or new.
149
+ * XXH3_state_t* state = XXH3_createState();
150
+ * assert(state != NULL && "Out of memory!");
151
+ * // Reset the state to start a new hashing session.
152
+ * XXH3_64bits_reset(state);
153
+ * char buffer[4096];
154
+ * size_t count;
155
+ * // Read the file in chunks
156
+ * while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
157
+ * // Run update() as many times as necessary to process the data
158
+ * XXH3_64bits_update(state, buffer, count);
159
+ * }
160
+ * // Retrieve the finalized hash. This will not change the state.
161
+ * XXH64_hash_t result = XXH3_64bits_digest(state);
162
+ * // Free the state. Do not use free().
163
+ * XXH3_freeState(state);
164
+ * return result;
165
+ * }
166
+ * @endcode
167
+ *
38
168
  * @file xxhash.h
39
169
  * xxHash prototypes and implementation
40
170
  */
41
- /* TODO: update */
42
- /* Notice extracted from xxHash homepage:
43
-
44
- xxHash is an extremely fast hash algorithm, running at RAM speed limits.
45
- It also successfully passes all tests from the SMHasher suite.
46
-
47
- Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
48
-
49
- Name Speed Q.Score Author
50
- xxHash 5.4 GB/s 10
51
- CrapWow 3.2 GB/s 2 Andrew
52
- MurmurHash 3a 2.7 GB/s 10 Austin Appleby
53
- SpookyHash 2.0 GB/s 10 Bob Jenkins
54
- SBox 1.4 GB/s 9 Bret Mulvey
55
- Lookup3 1.2 GB/s 9 Bob Jenkins
56
- SuperFastHash 1.2 GB/s 1 Paul Hsieh
57
- CityHash64 1.05 GB/s 10 Pike & Alakuijala
58
- FNV 0.55 GB/s 5 Fowler, Noll, Vo
59
- CRC32 0.43 GB/s 9
60
- MD5-32 0.33 GB/s 10 Ronald L. Rivest
61
- SHA1-32 0.28 GB/s 10
62
-
63
- Q.Score is a measure of quality of the hash function.
64
- It depends on successfully passing SMHasher test set.
65
- 10 is a perfect score.
66
-
67
- Note: SMHasher's CRC32 implementation is not the fastest one.
68
- Other speed-oriented implementations can be faster,
69
- especially in combination with PCLMUL instruction:
70
- https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
71
-
72
- A 64-bit version, named XXH64, is available since r35.
73
- It offers much better speed, but for 64-bit applications only.
74
- Name Speed on 64 bits Speed on 32 bits
75
- XXH64 13.8 GB/s 1.9 GB/s
76
- XXH32 6.8 GB/s 6.0 GB/s
77
- */
78
171
 
79
172
  #if defined (__cplusplus)
80
173
  extern "C" {
@@ -84,21 +177,53 @@ extern "C" {
84
177
  * INLINE mode
85
178
  ******************************/
86
179
  /*!
87
- * XXH_INLINE_ALL (and XXH_PRIVATE_API)
180
+ * @defgroup public Public API
181
+ * Contains details on the public xxHash functions.
182
+ * @{
183
+ */
184
+ #ifdef XXH_DOXYGEN
185
+ /*!
186
+ * @brief Exposes the implementation and marks all functions as `inline`.
187
+ *
88
188
  * Use these build macros to inline xxhash into the target unit.
89
189
  * Inlining improves performance on small inputs, especially when the length is
90
190
  * expressed as a compile-time constant:
91
191
  *
92
- * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
192
+ * https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
93
193
  *
94
194
  * It also keeps xxHash symbols private to the unit, so they are not exported.
95
195
  *
96
196
  * Usage:
197
+ * @code{.c}
97
198
  * #define XXH_INLINE_ALL
98
199
  * #include "xxhash.h"
99
- *
200
+ * @endcode
100
201
  * Do not compile and link xxhash.o as a separate object, as it is not useful.
101
202
  */
203
+ # define XXH_INLINE_ALL
204
+ # undef XXH_INLINE_ALL
205
+ /*!
206
+ * @brief Exposes the implementation without marking functions as inline.
207
+ */
208
+ # define XXH_PRIVATE_API
209
+ # undef XXH_PRIVATE_API
210
+ /*!
211
+ * @brief Emulate a namespace by transparently prefixing all symbols.
212
+ *
213
+ * If you want to include _and expose_ xxHash functions from within your own
214
+ * library, but also want to avoid symbol collisions with other libraries which
215
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
216
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
217
+ * (therefore, avoid empty or numeric values).
218
+ *
219
+ * Note that no change is required within the calling program as long as it
220
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
221
+ * by this header.
222
+ */
223
+ # define XXH_NAMESPACE /* YOUR NAME HERE */
224
+ # undef XXH_NAMESPACE
225
+ #endif
226
+
102
227
  #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
103
228
  && !defined(XXH_INLINE_ALL_31684351384)
104
229
  /* this section should be traversed only once */
@@ -157,6 +282,7 @@ extern "C" {
157
282
  # undef XXH3_64bits
158
283
  # undef XXH3_64bits_withSecret
159
284
  # undef XXH3_64bits_withSeed
285
+ # undef XXH3_64bits_withSecretandSeed
160
286
  # undef XXH3_createState
161
287
  # undef XXH3_freeState
162
288
  # undef XXH3_copyState
@@ -174,6 +300,7 @@ extern "C" {
174
300
  # undef XXH3_128bits_reset
175
301
  # undef XXH3_128bits_reset_withSeed
176
302
  # undef XXH3_128bits_reset_withSecret
303
+ # undef XXH3_128bits_reset_withSecretandSeed
177
304
  # undef XXH3_128bits_update
178
305
  # undef XXH3_128bits_digest
179
306
  # undef XXH128_isEqual
@@ -211,21 +338,13 @@ extern "C" {
211
338
  # undef XXHASH_H_STATIC_13879238742
212
339
  #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
213
340
 
214
-
215
-
216
341
  /* ****************************************************************
217
342
  * Stable API
218
343
  *****************************************************************/
219
344
  #ifndef XXHASH_H_5627135585666179
220
345
  #define XXHASH_H_5627135585666179 1
221
346
 
222
-
223
- /*!
224
- * @defgroup public Public API
225
- * Contains details on the public xxHash functions.
226
- * @{
227
- */
228
- /* specific declaration modes for Windows */
347
+ /*! @brief Marks a global symbol. */
229
348
  #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
230
349
  # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
231
350
  # ifdef XXH_EXPORT
@@ -238,24 +357,6 @@ extern "C" {
238
357
  # endif
239
358
  #endif
240
359
 
241
- #ifdef XXH_DOXYGEN
242
- /*!
243
- * @brief Emulate a namespace by transparently prefixing all symbols.
244
- *
245
- * If you want to include _and expose_ xxHash functions from within your own
246
- * library, but also want to avoid symbol collisions with other libraries which
247
- * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
248
- * any public symbol from xxhash library with the value of XXH_NAMESPACE
249
- * (therefore, avoid empty or numeric values).
250
- *
251
- * Note that no change is required within the calling program as long as it
252
- * includes `xxhash.h`: Regular symbol names will be automatically translated
253
- * by this header.
254
- */
255
- # define XXH_NAMESPACE /* YOUR NAME HERE */
256
- # undef XXH_NAMESPACE
257
- #endif
258
-
259
360
  #ifdef XXH_NAMESPACE
260
361
  # define XXH_CAT(A,B) A##B
261
362
  # define XXH_NAME2(A,B) XXH_CAT(A,B)
@@ -284,23 +385,28 @@ extern "C" {
284
385
  # define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
285
386
  # define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
286
387
  # define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
388
+ # define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
287
389
  # define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
288
390
  # define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
289
391
  # define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
290
392
  # define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
291
393
  # define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
292
394
  # define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
395
+ # define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
293
396
  # define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
294
397
  # define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
295
398
  # define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
399
+ # define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
296
400
  /* XXH3_128bits */
297
401
  # define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
298
402
  # define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
299
403
  # define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
300
404
  # define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
405
+ # define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
301
406
  # define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
302
407
  # define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
303
408
  # define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
409
+ # define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
304
410
  # define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
305
411
  # define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
306
412
  # define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
@@ -310,30 +416,64 @@ extern "C" {
310
416
  #endif
311
417
 
312
418
 
419
+ /* *************************************
420
+ * Compiler specifics
421
+ ***************************************/
422
+
423
+ /* specific declaration modes for Windows */
424
+ #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
425
+ # if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
426
+ # ifdef XXH_EXPORT
427
+ # define XXH_PUBLIC_API __declspec(dllexport)
428
+ # elif XXH_IMPORT
429
+ # define XXH_PUBLIC_API __declspec(dllimport)
430
+ # endif
431
+ # else
432
+ # define XXH_PUBLIC_API /* do nothing */
433
+ # endif
434
+ #endif
435
+
436
+ #if defined (__GNUC__)
437
+ # define XXH_CONSTF __attribute__((const))
438
+ # define XXH_PUREF __attribute__((pure))
439
+ # define XXH_MALLOCF __attribute__((malloc))
440
+ #else
441
+ # define XXH_CONSTF /* disable */
442
+ # define XXH_PUREF
443
+ # define XXH_MALLOCF
444
+ #endif
445
+
313
446
  /* *************************************
314
447
  * Version
315
448
  ***************************************/
316
449
  #define XXH_VERSION_MAJOR 0
317
450
  #define XXH_VERSION_MINOR 8
318
451
  #define XXH_VERSION_RELEASE 1
452
+ /*! @brief Version number, encoded as two digits each */
319
453
  #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
320
454
 
321
455
  /*!
322
456
  * @brief Obtains the xxHash version.
323
457
  *
324
- * This is only useful when xxHash is compiled as a shared library, as it is
325
- * independent of the version defined in the header.
458
+ * This is mostly useful when xxHash is compiled as a shared library,
459
+ * since the returned value comes from the library, as opposed to header file.
326
460
  *
327
- * @return `XXH_VERSION_NUMBER` as of when the libray was compiled.
461
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
328
462
  */
329
- XXH_PUBLIC_API unsigned XXH_versionNumber (void);
463
+ XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
330
464
 
331
465
 
332
466
  /* ****************************
333
- * Definitions
467
+ * Common basic types
334
468
  ******************************/
335
469
  #include <stddef.h> /* size_t */
336
- typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
470
+ /*!
471
+ * @brief Exit code for the streaming API.
472
+ */
473
+ typedef enum {
474
+ XXH_OK = 0, /*!< OK */
475
+ XXH_ERROR /*!< Error */
476
+ } XXH_errorcode;
337
477
 
338
478
 
339
479
  /*-**********************************************************************
@@ -357,30 +497,27 @@ typedef uint32_t XXH32_hash_t;
357
497
  # include <limits.h>
358
498
  # if UINT_MAX == 0xFFFFFFFFUL
359
499
  typedef unsigned int XXH32_hash_t;
500
+ # elif ULONG_MAX == 0xFFFFFFFFUL
501
+ typedef unsigned long XXH32_hash_t;
360
502
  # else
361
- # if ULONG_MAX == 0xFFFFFFFFUL
362
- typedef unsigned long XXH32_hash_t;
363
- # else
364
- # error "unsupported platform: need a 32-bit type"
365
- # endif
503
+ # error "unsupported platform: need a 32-bit type"
366
504
  # endif
367
505
  #endif
368
506
 
369
507
  /*!
370
508
  * @}
371
509
  *
372
- * @defgroup xxh32_family XXH32 family
510
+ * @defgroup XXH32_family XXH32 family
373
511
  * @ingroup public
374
512
  * Contains functions used in the classic 32-bit xxHash algorithm.
375
513
  *
376
514
  * @note
377
- * XXH32 is considered rather weak by today's standards.
378
- * The @ref xxh3_family provides competitive speed for both 32-bit and 64-bit
379
- * systems, and offers true 64/128 bit hash results. It provides a superior
380
- * level of dispersion, and greatly reduces the risks of collisions.
515
+ * XXH32 is useful for older platforms, with no or poor 64-bit performance.
516
+ * Note that the @ref XXH3_family provides competitive speed for both 32-bit
517
+ * and 64-bit systems, and offers true 64/128 bit hash results.
381
518
  *
382
- * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
383
- * @see @ref xxh32_impl for implementation details
519
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
520
+ * @see @ref XXH32_impl for implementation details
384
521
  * @{
385
522
  */
386
523
 
@@ -389,6 +526,8 @@ typedef uint32_t XXH32_hash_t;
389
526
  *
390
527
  * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
391
528
  *
529
+ * See @ref single_shot_example "Single Shot Example" for an example.
530
+ *
392
531
  * @param input The block of data to be hashed, at least @p length bytes in size.
393
532
  * @param length The length of @p input, in bytes.
394
533
  * @param seed The 32-bit seed to alter the hash's output predictably.
@@ -406,8 +545,9 @@ typedef uint32_t XXH32_hash_t;
406
545
  * @see
407
546
  * XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
408
547
  */
409
- XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
548
+ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
410
549
 
550
+ #ifndef XXH_NO_STREAM
411
551
  /*!
412
552
  * Streaming functions generate the xxHash value from an incremental input.
413
553
  * This method is slower than single-call functions, due to state management.
@@ -430,32 +570,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_
430
570
  *
431
571
  * When done, release the state using `XXH*_freeState()`.
432
572
  *
433
- * Example code for incrementally hashing a file:
434
- * @code{.c}
435
- * #include <stdio.h>
436
- * #include <xxhash.h>
437
- * #define BUFFER_SIZE 256
438
- *
439
- * // Note: XXH64 and XXH3 use the same interface.
440
- * XXH32_hash_t
441
- * hashFile(FILE* stream)
442
- * {
443
- * XXH32_state_t* state;
444
- * unsigned char buf[BUFFER_SIZE];
445
- * size_t amt;
446
- * XXH32_hash_t hash;
447
- *
448
- * state = XXH32_createState(); // Create a state
449
- * assert(state != NULL); // Error check here
450
- * XXH32_reset(state, 0xbaad5eed); // Reset state with our seed
451
- * while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
452
- * XXH32_update(state, buf, amt); // Hash the file in chunks
453
- * }
454
- * hash = XXH32_digest(state); // Finalize the hash
455
- * XXH32_freeState(state); // Clean up
456
- * return hash;
457
- * }
458
- * @endcode
573
+ * @see streaming_example at the top of @ref xxhash.h for an example.
459
574
  */
460
575
 
461
576
  /*!
@@ -472,7 +587,7 @@ typedef struct XXH32_state_s XXH32_state_t;
472
587
  * Must be freed with XXH32_freeState().
473
588
  * @return An allocated XXH32_state_t on success, `NULL` on failure.
474
589
  */
475
- XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
590
+ XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
476
591
  /*!
477
592
  * @brief Frees an @ref XXH32_state_t.
478
593
  *
@@ -540,7 +655,8 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
540
655
  *
541
656
  * @return The calculated xxHash32 value from that state.
542
657
  */
543
- XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
658
+ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
659
+ #endif /* !XXH_NO_STREAM */
544
660
 
545
661
  /******* Canonical representation *******/
546
662
 
@@ -591,39 +707,40 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
591
707
  *
592
708
  * @return The converted hash.
593
709
  */
594
- XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
710
+ XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
595
711
 
596
712
 
597
- /*
598
- Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
599
- introduced in CPP17 and C23.
600
- CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
601
- C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
602
- */
603
-
604
- #if defined (__has_c_attribute) && defined (__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) /* C2x */
605
- # if __has_c_attribute(fallthrough)
606
- # define XXH_FALLTHROUGH [[fallthrough]]
607
- # endif
713
+ #ifdef __has_attribute
714
+ # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
715
+ #else
716
+ # define XXH_HAS_ATTRIBUTE(x) 0
717
+ #endif
608
718
 
609
- #elif defined(__cplusplus) && defined(__has_cpp_attribute)
610
- # if __has_cpp_attribute(fallthrough)
611
- # define XXH_FALLTHROUGH [[fallthrough]]
612
- # endif
719
+ /* C-language Attributes are added in C23. */
720
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
721
+ # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
722
+ #else
723
+ # define XXH_HAS_C_ATTRIBUTE(x) 0
613
724
  #endif
614
725
 
615
- #ifndef XXH_FALLTHROUGH
616
- # if defined(__GNUC__) && __GNUC__ >= 7
617
- # define XXH_FALLTHROUGH __attribute__ ((fallthrough))
618
- # elif defined(__clang__) && (__clang_major__ >= 10) \
619
- && (!defined(__APPLE__) || (__clang_major__ >= 12))
620
- /* Apple clang 12 is effectively clang-10 ,
621
- * see https://en.wikipedia.org/wiki/Xcode for details
622
- */
623
- # define XXH_FALLTHROUGH __attribute__ ((fallthrough))
624
- # else
625
- # define XXH_FALLTHROUGH
626
- # endif
726
+ #if defined(__cplusplus) && defined(__has_cpp_attribute)
727
+ # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
728
+ #else
729
+ # define XXH_HAS_CPP_ATTRIBUTE(x) 0
730
+ #endif
731
+
732
+ /*
733
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
734
+ * introduced in CPP17 and C23.
735
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
736
+ * C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
737
+ */
738
+ #if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
739
+ # define XXH_FALLTHROUGH [[fallthrough]]
740
+ #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
741
+ # define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
742
+ #else
743
+ # define XXH_FALLTHROUGH /* fallthrough */
627
744
  #endif
628
745
 
629
746
  /*!
@@ -662,18 +779,17 @@ typedef uint64_t XXH64_hash_t;
662
779
  /*!
663
780
  * @}
664
781
  *
665
- * @defgroup xxh64_family XXH64 family
782
+ * @defgroup XXH64_family XXH64 family
666
783
  * @ingroup public
667
784
  * @{
668
785
  * Contains functions used in the classic 64-bit xxHash algorithm.
669
786
  *
670
787
  * @note
671
788
  * XXH3 provides competitive speed for both 32-bit and 64-bit systems,
672
- * and offers true 64/128 bit hash results. It provides a superior level of
673
- * dispersion, and greatly reduces the risks of collisions.
789
+ * and offers true 64/128 bit hash results.
790
+ * It provides better speed for systems with vector processing capabilities.
674
791
  */
675
792
 
676
-
677
793
  /*!
678
794
  * @brief Calculates the 64-bit hash of @p input using xxHash64.
679
795
  *
@@ -697,32 +813,35 @@ typedef uint64_t XXH64_hash_t;
697
813
  * @see
698
814
  * XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
699
815
  */
700
- XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
816
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
701
817
 
702
818
  /******* Streaming *******/
819
+ #ifndef XXH_NO_STREAM
703
820
  /*!
704
821
  * @brief The opaque state struct for the XXH64 streaming API.
705
822
  *
706
823
  * @see XXH64_state_s for details.
707
824
  */
708
825
  typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
709
- XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
826
+ XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
710
827
  XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
711
828
  XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
712
829
 
713
830
  XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
714
831
  XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
715
- XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
716
-
832
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
833
+ #endif /* !XXH_NO_STREAM */
717
834
  /******* Canonical representation *******/
718
835
  typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
719
836
  XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
720
- XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
837
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
838
+
839
+ #ifndef XXH_NO_XXH3
721
840
 
722
841
  /*!
723
842
  * @}
724
843
  * ************************************************************************
725
- * @defgroup xxh3_family XXH3 family
844
+ * @defgroup XXH3_family XXH3 family
726
845
  * @ingroup public
727
846
  * @{
728
847
  *
@@ -742,12 +861,14 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
742
861
  *
743
862
  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
744
863
  * but does not require it.
745
- * Any 32-bit and 64-bit targets that can run XXH32 smoothly
746
- * can run XXH3 at competitive speeds, even without vector support.
747
- * Further details are explained in the implementation.
864
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
865
+ * at competitive speeds, even without vector support. Further details are
866
+ * explained in the implementation.
748
867
  *
749
868
  * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
750
- * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
869
+ * ZVector and scalar targets. This can be controlled via the @ref XXH_VECTOR
870
+ * macro. For the x86 family, an automatic dispatcher is included separately
871
+ * in @ref xxh_x86dispatch.c.
751
872
  *
752
873
  * XXH3 implementation is portable:
753
874
  * it has a generic C90 formulation that can be compiled on any platform,
@@ -763,24 +884,42 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
763
884
  *
764
885
  * The API supports one-shot hashing, streaming mode, and custom secrets.
765
886
  */
766
-
767
887
  /*-**********************************************************************
768
888
  * XXH3 64-bit variant
769
889
  ************************************************************************/
770
890
 
771
- /* XXH3_64bits():
772
- * default 64-bit variant, using default secret and default seed of 0.
773
- * It's the fastest variant. */
774
- XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
891
+ /*!
892
+ * @brief 64-bit unseeded variant of XXH3.
893
+ *
894
+ * This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
895
+ * it may have slightly better performance due to constant propagation of the
896
+ * defaults.
897
+ *
898
+ * @see
899
+ * XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
900
+ * @see
901
+ * XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
902
+ * @see
903
+ * XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
904
+ */
905
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(const void* input, size_t length);
775
906
 
776
- /*
777
- * XXH3_64bits_withSeed():
778
- * This variant generates a custom secret on the fly
779
- * based on default secret altered using the `seed` value.
907
+ /*!
908
+ * @brief 64-bit seeded variant of XXH3
909
+ *
910
+ * This variant generates a custom secret on the fly based on default secret
911
+ * altered using the `seed` value.
912
+ *
780
913
  * While this operation is decently fast, note that it's not completely free.
781
- * Note: seed==0 produces the same results as XXH3_64bits().
914
+ *
915
+ * @note
916
+ * seed == 0 produces the same results as @ref XXH3_64bits().
917
+ *
918
+ * @param input The data to hash
919
+ * @param length The length
920
+ * @param seed The 64-bit seed to alter the state.
782
921
  */
783
- XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
922
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(const void* input, size_t length, XXH64_hash_t seed);
784
923
 
785
924
  /*!
786
925
  * The bare minimum size for a custom secret.
@@ -791,23 +930,29 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
791
930
  */
792
931
  #define XXH3_SECRET_SIZE_MIN 136
793
932
 
794
- /*
795
- * XXH3_64bits_withSecret():
933
+ /*!
934
+ * @brief 64-bit variant of XXH3 with a custom "secret".
935
+ *
796
936
  * It's possible to provide any blob of bytes as a "secret" to generate the hash.
797
937
  * This makes it more difficult for an external actor to prepare an intentional collision.
798
938
  * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
799
- * However, the quality of produced hash values depends on secret's entropy.
800
- * Technically, the secret must look like a bunch of random bytes.
939
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
940
+ * Therefore, the secret _must_ look like a bunch of random bytes.
801
941
  * Avoid "trivial" or structured data such as repeated sequences or a text document.
802
- * Whenever unsure about the "randomness" of the blob of bytes,
803
- * consider relabelling it as a "custom seed" instead,
804
- * and employ "XXH3_generateSecret()" (see below)
805
- * to generate a high entropy secret derived from the custom seed.
942
+ * Whenever in doubt about the "randomness" of the blob of bytes,
943
+ * consider employing "XXH3_generateSecret()" instead (see below).
944
+ * It will generate a proper high entropy secret derived from the blob of bytes.
945
+ * Another advantage of using XXH3_generateSecret() is that
946
+ * it guarantees that all bits within the initial blob of bytes
947
+ * will impact every bit of the output.
948
+ * This is not necessarily the case when using the blob of bytes directly
949
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
806
950
  */
807
- XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
951
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
808
952
 
809
953
 
810
954
  /******* Streaming *******/
955
+ #ifndef XXH_NO_STREAM
811
956
  /*
812
957
  * Streaming requires state maintenance.
813
958
  * This operation costs memory and CPU.
@@ -821,7 +966,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
821
966
  * @see XXH3_state_s for details.
822
967
  */
823
968
  typedef struct XXH3_state_s XXH3_state_t;
824
- XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
969
+ XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
825
970
  XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
826
971
  XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
827
972
 
@@ -837,7 +982,7 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
837
982
  * digest will be equivalent to `XXH3_64bits_withSeed()`.
838
983
  */
839
984
  XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
840
- /*
985
+ /*!
841
986
  * XXH3_64bits_reset_withSecret():
842
987
  * `secret` is referenced, it _must outlive_ the hash streaming session.
843
988
  * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
@@ -849,7 +994,8 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr,
849
994
  XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
850
995
 
851
996
  XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
852
- XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
997
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
998
+ #endif /* !XXH_NO_STREAM */
853
999
 
854
1000
  /* note : canonical representation of XXH3 is the same as XXH64
855
1001
  * since they both produce XXH64_hash_t values */
@@ -870,11 +1016,31 @@ typedef struct {
870
1016
  XXH64_hash_t high64; /*!< `value >> 64` */
871
1017
  } XXH128_hash_t;
872
1018
 
873
- XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
874
- XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
875
- XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
1019
+ /*!
1020
+ * @brief Unseeded 128-bit variant of XXH3
1021
+ *
1022
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
1023
+ * for shorter inputs.
1024
+ *
1025
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
1026
+ * it may have slightly better performance due to constant propagation of the
1027
+ * defaults.
1028
+ *
1029
+ * @see
1030
+ * XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
1031
+ * @see
1032
+ * XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
1033
+ * @see
1034
+ * XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
1035
+ */
1036
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(const void* data, size_t len);
1037
+ /*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
1038
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
1039
+ /*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
1040
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
876
1041
 
877
1042
  /******* Streaming *******/
1043
+ #ifndef XXH_NO_STREAM
878
1044
  /*
879
1045
  * Streaming requires state maintenance.
880
1046
  * This operation costs memory and CPU.
@@ -892,7 +1058,8 @@ XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr,
892
1058
  XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
893
1059
 
894
1060
  XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
895
- XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
1061
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
1062
+ #endif /* !XXH_NO_STREAM */
896
1063
 
897
1064
  /* Following helper functions make it possible to compare XXH128_hast_t values.
898
1065
  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
@@ -902,26 +1069,26 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
902
1069
  * XXH128_isEqual():
903
1070
  * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
904
1071
  */
905
- XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
1072
+ XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
906
1073
 
907
1074
  /*!
908
- * XXH128_cmp():
909
- *
1075
+ * @brief Compares two @ref XXH128_hash_t
910
1076
  * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
911
1077
  *
912
- * return: >0 if *h128_1 > *h128_2
913
- * =0 if *h128_1 == *h128_2
914
- * <0 if *h128_1 < *h128_2
1078
+ * @return: >0 if *h128_1 > *h128_2
1079
+ * =0 if *h128_1 == *h128_2
1080
+ * <0 if *h128_1 < *h128_2
915
1081
  */
916
- XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
1082
+ XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(const void* h128_1, const void* h128_2);
917
1083
 
918
1084
 
919
1085
  /******* Canonical representation *******/
920
1086
  typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
921
1087
  XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
922
- XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
1088
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
923
1089
 
924
1090
 
1091
+ #endif /* !XXH_NO_XXH3 */
925
1092
  #endif /* XXH_NO_LONG_LONG */
926
1093
 
927
1094
  /*!
@@ -962,13 +1129,10 @@ XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t*
962
1129
  struct XXH32_state_s {
963
1130
  XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
964
1131
  XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
965
- XXH32_hash_t v1; /*!< First accumulator lane */
966
- XXH32_hash_t v2; /*!< Second accumulator lane */
967
- XXH32_hash_t v3; /*!< Third accumulator lane */
968
- XXH32_hash_t v4; /*!< Fourth accumulator lane */
1132
+ XXH32_hash_t v[4]; /*!< Accumulator lanes */
969
1133
  XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
970
1134
  XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
971
- XXH32_hash_t reserved; /*!< Reserved field. Do not read or write to it, it may be removed. */
1135
+ XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
972
1136
  }; /* typedef'd to XXH32_state_t */
973
1137
 
974
1138
 
@@ -988,16 +1152,15 @@ struct XXH32_state_s {
988
1152
  */
989
1153
  struct XXH64_state_s {
990
1154
  XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
991
- XXH64_hash_t v1; /*!< First accumulator lane */
992
- XXH64_hash_t v2; /*!< Second accumulator lane */
993
- XXH64_hash_t v3; /*!< Third accumulator lane */
994
- XXH64_hash_t v4; /*!< Fourth accumulator lane */
1155
+ XXH64_hash_t v[4]; /*!< Accumulator lanes */
995
1156
  XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
996
1157
  XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
997
1158
  XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
998
- XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it, it may be removed. */
1159
+ XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
999
1160
  }; /* typedef'd to XXH64_state_t */
1000
1161
 
1162
+ #ifndef XXH_NO_XXH3
1163
+
1001
1164
  #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
1002
1165
  # include <stdalign.h>
1003
1166
  # define XXH_ALIGN(n) alignas(n)
@@ -1063,14 +1226,14 @@ struct XXH64_state_s {
1063
1226
  */
1064
1227
  struct XXH3_state_s {
1065
1228
  XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
1066
- /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
1229
+ /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
1067
1230
  XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
1068
1231
  /*!< Used to store a custom secret generated from a seed. */
1069
1232
  XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
1070
1233
  /*!< The internal buffer. @see XXH32_state_s::mem32 */
1071
1234
  XXH32_hash_t bufferedSize;
1072
1235
  /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
1073
- XXH32_hash_t reserved32;
1236
+ XXH32_hash_t useSeed;
1074
1237
  /*!< Reserved field. Needed for padding on 64-bit. */
1075
1238
  size_t nbStripesSoFar;
1076
1239
  /*!< Number or stripes processed. */
@@ -1106,45 +1269,156 @@ struct XXH3_state_s {
1106
1269
  #define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
1107
1270
 
1108
1271
 
1272
+ /*!
1273
+ * simple alias to pre-selected XXH3_128bits variant
1274
+ */
1275
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
1276
+
1277
+
1109
1278
  /* === Experimental API === */
1110
1279
  /* Symbols defined below must be considered tied to a specific library version. */
1111
1280
 
1112
- /*
1281
+ /*!
1113
1282
  * XXH3_generateSecret():
1114
1283
  *
1115
1284
  * Derive a high-entropy secret from any user-defined content, named customSeed.
1116
1285
  * The generated secret can be used in combination with `*_withSecret()` functions.
1117
- * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
1118
- * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
1286
+ * The `_withSecret()` variants are useful to provide a higher level of protection
1287
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
1288
+ * guess how to impact the calculation logic.
1119
1289
  *
1120
1290
  * The function accepts as input a custom seed of any length and any content,
1121
- * and derives from it a high-entropy secret of length XXH3_SECRET_DEFAULT_SIZE
1122
- * into an already allocated buffer secretBuffer.
1123
- * The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long.
1291
+ * and derives from it a high-entropy secret of length @p secretSize into an
1292
+ * already allocated buffer @p secretBuffer.
1124
1293
  *
1125
1294
  * The generated secret can then be used with any `*_withSecret()` variant.
1126
- * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
1127
- * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
1295
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
1296
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
1128
1297
  * are part of this list. They all accept a `secret` parameter
1129
- * which must be very long for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
1298
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
1130
1299
  * _and_ feature very high entropy (consist of random-looking bytes).
1131
- * These conditions can be a high bar to meet, so
1132
- * this function can be used to generate a secret of proper quality.
1300
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
1301
+ * be employed to ensure proper quality.
1133
1302
  *
1134
- * customSeed can be anything. It can have any size, even small ones,
1135
- * and its content can be anything, even stupidly "low entropy" source such as a bunch of zeroes.
1136
- * The resulting `secret` will nonetheless provide all expected qualities.
1303
+ * @p customSeed can be anything. It can have any size, even small ones,
1304
+ * and its content can be anything, even "poor entropy" sources such as a bunch
1305
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
1306
+ *
1307
+ * @pre
1308
+ * - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
1309
+ * - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
1137
1310
  *
1138
- * Supplying NULL as the customSeed copies the default secret into `secretBuffer`.
1139
- * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
1311
+ * Example code:
1312
+ * @code{.c}
1313
+ * #include <stdio.h>
1314
+ * #include <stdlib.h>
1315
+ * #include <string.h>
1316
+ * #define XXH_STATIC_LINKING_ONLY // expose unstable API
1317
+ * #include "xxhash.h"
1318
+ * // Hashes argv[2] using the entropy from argv[1].
1319
+ * int main(int argc, char* argv[])
1320
+ * {
1321
+ * char secret[XXH3_SECRET_SIZE_MIN];
1322
+ * if (argv != 3) { return 1; }
1323
+ * XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
1324
+ * XXH64_hash_t h = XXH3_64bits_withSecret(
1325
+ * argv[2], strlen(argv[2]),
1326
+ * secret, sizeof(secret)
1327
+ * );
1328
+ * printf("%016llx\n", (unsigned long long) h);
1329
+ * }
1330
+ * @endcode
1140
1331
  */
1141
- XXH_PUBLIC_API void XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize);
1142
-
1332
+ XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
1143
1333
 
1144
- /* simple short-cut to pre-selected XXH3_128bits variant */
1145
- XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
1334
+ /*!
1335
+ * @brief Generate the same secret as the _withSeed() variants.
1336
+ *
1337
+ * The generated secret can be used in combination with
1338
+ *`*_withSecret()` and `_withSecretandSeed()` variants.
1339
+ *
1340
+ * Example C++ `std::string` hash class:
1341
+ * @code{.cpp}
1342
+ * #include <string>
1343
+ * #define XXH_STATIC_LINKING_ONLY // expose unstable API
1344
+ * #include "xxhash.h"
1345
+ * // Slow, seeds each time
1346
+ * class HashSlow {
1347
+ * XXH64_hash_t seed;
1348
+ * public:
1349
+ * HashSlow(XXH64_hash_t s) : seed{s} {}
1350
+ * size_t operator()(const std::string& x) const {
1351
+ * return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
1352
+ * }
1353
+ * };
1354
+ * // Fast, caches the seeded secret for future uses.
1355
+ * class HashFast {
1356
+ * unsigned char secret[XXH3_SECRET_SIZE_MIN];
1357
+ * public:
1358
+ * HashFast(XXH64_hash_t s) {
1359
+ * XXH3_generateSecret_fromSeed(secret, seed);
1360
+ * }
1361
+ * size_t operator()(const std::string& x) const {
1362
+ * return size_t{
1363
+ * XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
1364
+ * };
1365
+ * }
1366
+ * };
1367
+ * @endcode
1368
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
1369
+ * @param seed The seed to seed the state.
1370
+ */
1371
+ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
1146
1372
 
1373
+ /*!
1374
+ * These variants generate hash values using either
1375
+ * @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
1376
+ * or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
1377
+ *
1378
+ * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
1379
+ * `_withSeed()` has to generate the secret on the fly for "large" keys.
1380
+ * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
1381
+ * `_withSecret()` has to generate the masks on the fly for "small" keys,
1382
+ * which requires more instructions than _withSeed() variants.
1383
+ * Therefore, _withSecretandSeed variant combines the best of both worlds.
1384
+ *
1385
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
1386
+ * this variant produces *exactly* the same results as `_withSeed()` variant,
1387
+ * hence offering only a pure speed benefit on "large" input,
1388
+ * by skipping the need to regenerate the secret for every large input.
1389
+ *
1390
+ * Another usage scenario is to hash the secret to a 64-bit hash value,
1391
+ * for example with XXH3_64bits(), which then becomes the seed,
1392
+ * and then employ both the seed and the secret in _withSecretandSeed().
1393
+ * On top of speed, an added benefit is that each bit in the secret
1394
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
1395
+ *
1396
+ * This is not guaranteed when using the secret directly in "small data" scenarios,
1397
+ * because only portions of the secret are employed for small data.
1398
+ */
1399
+ XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
1400
+ XXH3_64bits_withSecretandSeed(const void* data, size_t len,
1401
+ const void* secret, size_t secretSize,
1402
+ XXH64_hash_t seed);
1403
+ /*! @copydoc XXH3_64bits_withSecretandSeed() */
1404
+ XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
1405
+ XXH3_128bits_withSecretandSeed(const void* input, size_t length,
1406
+ const void* secret, size_t secretSize,
1407
+ XXH64_hash_t seed64);
1408
+ #ifndef XXH_NO_STREAM
1409
+ /*! @copydoc XXH3_64bits_withSecretandSeed() */
1410
+ XXH_PUBLIC_API XXH_errorcode
1411
+ XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
1412
+ const void* secret, size_t secretSize,
1413
+ XXH64_hash_t seed64);
1414
+ /*! @copydoc XXH3_64bits_withSecretandSeed() */
1415
+ XXH_PUBLIC_API XXH_errorcode
1416
+ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
1417
+ const void* secret, size_t secretSize,
1418
+ XXH64_hash_t seed64);
1419
+ #endif /* !XXH_NO_STREAM */
1147
1420
 
1421
+ #endif /* !XXH_NO_XXH3 */
1148
1422
  #endif /* XXH_NO_LONG_LONG */
1149
1423
  #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
1150
1424
  # define XXH_IMPLEMENTATION
@@ -1198,7 +1472,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1198
1472
  /*!
1199
1473
  * @brief Define this to disable 64-bit code.
1200
1474
  *
1201
- * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
1475
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
1202
1476
  */
1203
1477
  # define XXH_NO_LONG_LONG
1204
1478
  # undef XXH_NO_LONG_LONG /* don't actually */
@@ -1221,7 +1495,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1221
1495
  * Use `memcpy()`. Safe and portable. Note that most modern compilers will
1222
1496
  * eliminate the function call and treat it as an unaligned access.
1223
1497
  *
1224
- * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
1498
+ * - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
1225
1499
  * @par
1226
1500
  * Depends on compiler extensions and is therefore not portable.
1227
1501
  * This method is safe _if_ your compiler supports it,
@@ -1248,22 +1522,40 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1248
1522
  * care, as what works on one compiler/platform/optimization level may cause
1249
1523
  * another to read garbage data or even crash.
1250
1524
  *
1251
- * See https://stackoverflow.com/a/32095106/646947 for details.
1525
+ * See http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
1252
1526
  *
1253
1527
  * Prefer these methods in priority order (0 > 3 > 1 > 2)
1254
1528
  */
1255
1529
  # define XXH_FORCE_MEMORY_ACCESS 0
1530
+
1256
1531
  /*!
1257
- * @def XXH_ACCEPT_NULL_INPUT_POINTER
1258
- * @brief Whether to add explicit `NULL` checks.
1532
+ * @def XXH_SIZE_OPT
1533
+ * @brief Controls how much xxHash optimizes for size.
1259
1534
  *
1260
- * If the input pointer is `NULL` and the length is non-zero, xxHash's default
1261
- * behavior is to dereference it, triggering a segfault.
1535
+ * xxHash, when compiled, tends to result in a rather large binary size. This
1536
+ * is mostly due to heavy usage to forced inlining and constant folding of the
1537
+ * @ref XXH3_family to increase performance.
1262
1538
  *
1263
- * When this macro is enabled, xxHash actively checks the input for a null pointer.
1264
- * If it is, the result for null input pointers is the same as a zero-length input.
1539
+ * However, some developers prefer size over speed. This option can
1540
+ * significantly reduce the size of the generated code. When using the `-Os`
1541
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
1542
+ * otherwise it is defined to 0.
1543
+ *
1544
+ * Most of these size optimizations can be controlled manually.
1545
+ *
1546
+ * This is a number from 0-2.
1547
+ * - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
1548
+ * comes first.
1549
+ * - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
1550
+ * conservative and disables hacks that increase code size. It implies the
1551
+ * options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
1552
+ * and @ref XXH3_NEON_LANES == 8 if they are not already defined.
1553
+ * - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
1554
+ * Performance may cry. For example, the single shot functions just use the
1555
+ * streaming API.
1265
1556
  */
1266
- # define XXH_ACCEPT_NULL_INPUT_POINTER 0
1557
+ # define XXH_SIZE_OPT 0
1558
+
1267
1559
  /*!
1268
1560
  * @def XXH_FORCE_ALIGN_CHECK
1269
1561
  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
@@ -1285,9 +1577,11 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1285
1577
  *
1286
1578
  * In these cases, the alignment check can be removed by setting this macro to 0.
1287
1579
  * Then the code will always use unaligned memory access.
1288
- * Align check is automatically disabled on x86, x64 & arm64,
1580
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
1289
1581
  * which are platforms known to offer good unaligned memory accesses performance.
1290
1582
  *
1583
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
1584
+ *
1291
1585
  * This option does not affect XXH3 (only XXH32 and XXH64).
1292
1586
  */
1293
1587
  # define XXH_FORCE_ALIGN_CHECK 0
@@ -1309,24 +1603,22 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1309
1603
  * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
1310
1604
  * compiler full control on whether to inline or not.
1311
1605
  *
1312
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
1313
- * -fno-inline with GCC or Clang, this will automatically be defined.
1606
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
1607
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
1314
1608
  */
1315
1609
  # define XXH_NO_INLINE_HINTS 0
1316
1610
 
1317
1611
  /*!
1318
- * @def XXH_REROLL
1319
- * @brief Whether to reroll `XXH32_finalize`.
1612
+ * @def XXH32_ENDJMP
1613
+ * @brief Whether to use a jump for `XXH32_finalize`.
1320
1614
  *
1321
- * For performance, `XXH32_finalize` uses an unrolled loop
1322
- * in the form of a switch statement.
1615
+ * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
1616
+ * This is generally preferable for performance,
1617
+ * but depending on exact architecture, a jmp may be preferable.
1323
1618
  *
1324
- * This is not always desirable, as it generates larger code,
1325
- * and depending on the architecture, may even be slower
1326
- *
1327
- * This is automatically defined with `-Os`/`-Oz` on GCC and Clang.
1619
+ * This setting is only possibly making a difference for very small inputs.
1328
1620
  */
1329
- # define XXH_REROLL 0
1621
+ # define XXH32_ENDJMP 0
1330
1622
 
1331
1623
  /*!
1332
1624
  * @internal
@@ -1337,38 +1629,45 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1337
1629
  */
1338
1630
  # define XXH_OLD_NAMES
1339
1631
  # undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
1632
+
1633
+ /*!
1634
+ * @def XXH_NO_STREAM
1635
+ * @brief Disables the streaming API.
1636
+ *
1637
+ * When xxHash is not inlined and the streaming functions are not used, disabling
1638
+ * the streaming functions can improve code size significantly, especially with
1639
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
1640
+ */
1641
+ # define XXH_NO_STREAM
1642
+ # undef XXH_NO_STREAM /* don't actually */
1340
1643
  #endif /* XXH_DOXYGEN */
1341
1644
  /*!
1342
1645
  * @}
1343
1646
  */
1344
1647
 
1345
1648
  #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
1346
- /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
1347
- # if !defined(__clang__) && \
1348
- ( \
1349
- (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
1350
- ( \
1351
- defined(__GNUC__) && ( \
1352
- (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
1353
- ( \
1354
- defined(__mips__) && \
1355
- (__mips <= 5 || __mips_isa_rev < 6) && \
1356
- (!defined(__mips16) || defined(__mips_mips16e2)) \
1357
- ) \
1358
- ) \
1359
- ) \
1360
- )
1649
+ /* prefer __packed__ structures (method 1) for GCC
1650
+ * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
1651
+ * which for some reason does unaligned loads. */
1652
+ # if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
1361
1653
  # define XXH_FORCE_MEMORY_ACCESS 1
1362
1654
  # endif
1363
1655
  #endif
1364
1656
 
1365
- #ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
1366
- # define XXH_ACCEPT_NULL_INPUT_POINTER 0
1657
+ #ifndef XXH_SIZE_OPT
1658
+ /* default to 1 for -Os or -Oz */
1659
+ # if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
1660
+ # define XXH_SIZE_OPT 1
1661
+ # else
1662
+ # define XXH_SIZE_OPT 0
1663
+ # endif
1367
1664
  #endif
1368
1665
 
1369
1666
  #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
1370
- # if defined(__i386) || defined(__x86_64__) || defined(__aarch64__) \
1371
- || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) /* visual */
1667
+ /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
1668
+ # if XXH_SIZE_OPT >= 1 || \
1669
+ defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
1670
+ || defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) /* visual */
1372
1671
  # define XXH_FORCE_ALIGN_CHECK 0
1373
1672
  # else
1374
1673
  # define XXH_FORCE_ALIGN_CHECK 1
@@ -1376,22 +1675,16 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1376
1675
  #endif
1377
1676
 
1378
1677
  #ifndef XXH_NO_INLINE_HINTS
1379
- # if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
1380
- || defined(__NO_INLINE__) /* -O0, -fno-inline */
1678
+ # if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
1381
1679
  # define XXH_NO_INLINE_HINTS 1
1382
1680
  # else
1383
1681
  # define XXH_NO_INLINE_HINTS 0
1384
1682
  # endif
1385
1683
  #endif
1386
1684
 
1387
- #ifndef XXH_REROLL
1388
- # if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ || \
1389
- (defined(__GNUC__) && !defined(__clang__))
1390
- /* The if/then loop is preferable to switch/case on gcc (on x64) */
1391
- # define XXH_REROLL 1
1392
- # else
1393
- # define XXH_REROLL 0
1394
- # endif
1685
+ #ifndef XXH32_ENDJMP
1686
+ /* generally preferable for performance */
1687
+ # define XXH32_ENDJMP 0
1395
1688
  #endif
1396
1689
 
1397
1690
  /*!
@@ -1403,6 +1696,24 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1403
1696
  /* *************************************
1404
1697
  * Includes & Memory related functions
1405
1698
  ***************************************/
1699
+ #if defined(XXH_NO_STREAM)
1700
+ /* nothing */
1701
+ #elif defined(XXH_NO_STDLIB)
1702
+
1703
+ /* When requesting to disable any mention of stdlib,
1704
+ * the library loses the ability to invoked malloc / free.
1705
+ * In practice, it means that functions like `XXH*_createState()`
1706
+ * will always fail, and return NULL.
1707
+ * This flag is useful in situations where
1708
+ * xxhash.h is integrated into some kernel, embedded or limited environment
1709
+ * without access to dynamic allocation.
1710
+ */
1711
+
1712
+ static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
1713
+ static void XXH_free(void* p) { (void)p; }
1714
+
1715
+ #else
1716
+
1406
1717
  /*
1407
1718
  * Modify the local functions below should you wish to use
1408
1719
  * different memory routines for malloc() and free()
@@ -1413,7 +1724,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
1413
1724
  * @internal
1414
1725
  * @brief Modify this function to use a different routine than malloc().
1415
1726
  */
1416
- static void* XXH_malloc(size_t s) { return malloc(s); }
1727
+ static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
1417
1728
 
1418
1729
  /*!
1419
1730
  * @internal
@@ -1421,6 +1732,8 @@ static void* XXH_malloc(size_t s) { return malloc(s); }
1421
1732
  */
1422
1733
  static void XXH_free(void* p) { free(p); }
1423
1734
 
1735
+ #endif /* XXH_NO_STDLIB */
1736
+
1424
1737
  #include <string.h>
1425
1738
 
1426
1739
  /*!
@@ -1443,19 +1756,19 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
1443
1756
  #endif
1444
1757
 
1445
1758
  #if XXH_NO_INLINE_HINTS /* disable inlining hints */
1446
- # if defined(__GNUC__)
1759
+ # if defined(__GNUC__) || defined(__clang__)
1447
1760
  # define XXH_FORCE_INLINE static __attribute__((unused))
1448
1761
  # else
1449
1762
  # define XXH_FORCE_INLINE static
1450
1763
  # endif
1451
1764
  # define XXH_NO_INLINE static
1452
1765
  /* enable inlining hints */
1766
+ #elif defined(__GNUC__) || defined(__clang__)
1767
+ # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1768
+ # define XXH_NO_INLINE static __attribute__((noinline))
1453
1769
  #elif defined(_MSC_VER) /* Visual Studio */
1454
1770
  # define XXH_FORCE_INLINE static __forceinline
1455
1771
  # define XXH_NO_INLINE static __declspec(noinline)
1456
- #elif defined(__GNUC__)
1457
- # define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
1458
- # define XXH_NO_INLINE static __attribute__((noinline))
1459
1772
  #elif defined (__cplusplus) \
1460
1773
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
1461
1774
  # define XXH_FORCE_INLINE static inline
@@ -1496,8 +1809,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
1496
1809
  /* note: use after variable declarations */
1497
1810
  #ifndef XXH_STATIC_ASSERT
1498
1811
  # if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
1499
- # include <assert.h>
1500
- # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1812
+ # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
1501
1813
  # elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
1502
1814
  # define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
1503
1815
  # else
@@ -1522,7 +1834,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
1522
1834
  * We also use it to prevent unwanted constant folding for AArch64 in
1523
1835
  * XXH3_initCustomSecret_scalar().
1524
1836
  */
1525
- #ifdef __GNUC__
1837
+ #if defined(__GNUC__) || defined(__clang__)
1526
1838
  # define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
1527
1839
  #else
1528
1840
  # define XXH_COMPILER_GUARD(var) ((void)0)
@@ -1615,30 +1927,31 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
1615
1927
  #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
1616
1928
 
1617
1929
  /*
1618
- * __pack instructions are safer but compiler specific, hence potentially
1619
- * problematic for some compilers.
1620
- *
1621
- * Currently only defined for GCC and ICC.
1930
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
1931
+ * documentation claimed that it only increased the alignment, but actually it
1932
+ * can decrease it on gcc, clang, and icc:
1933
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
1934
+ * https://gcc.godbolt.org/z/xYez1j67Y.
1622
1935
  */
1623
1936
  #ifdef XXH_OLD_NAMES
1624
1937
  typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
1625
1938
  #endif
1626
1939
  static xxh_u32 XXH_read32(const void* ptr)
1627
1940
  {
1628
- typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
1629
- return ((const xxh_unalign*)ptr)->u32;
1941
+ typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
1942
+ return *((const xxh_unalign32*)ptr);
1630
1943
  }
1631
1944
 
1632
1945
  #else
1633
1946
 
1634
1947
  /*
1635
1948
  * Portable and safe solution. Generally efficient.
1636
- * see: https://stackoverflow.com/a/32095106/646947
1949
+ * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
1637
1950
  */
1638
1951
  static xxh_u32 XXH_read32(const void* memPtr)
1639
1952
  {
1640
1953
  xxh_u32 val;
1641
- memcpy(&val, memPtr, sizeof(val));
1954
+ XXH_memcpy(&val, memPtr, sizeof(val));
1642
1955
  return val;
1643
1956
  }
1644
1957
 
@@ -1831,8 +2144,10 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
1831
2144
  *********************************************************************/
1832
2145
  /*!
1833
2146
  * @}
1834
- * @defgroup xxh32_impl XXH32 implementation
2147
+ * @defgroup XXH32_impl XXH32 implementation
1835
2148
  * @ingroup impl
2149
+ *
2150
+ * Details on the XXH32 implementation.
1836
2151
  * @{
1837
2152
  */
1838
2153
  /* #define instead of static const, to be used as initializers */
@@ -1912,17 +2227,17 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
1912
2227
  * The final mix ensures that all input bits have a chance to impact any bit in
1913
2228
  * the output digest, resulting in an unbiased distribution.
1914
2229
  *
1915
- * @param h32 The hash to avalanche.
2230
+ * @param hash The hash to avalanche.
1916
2231
  * @return The avalanched hash.
1917
2232
  */
1918
- static xxh_u32 XXH32_avalanche(xxh_u32 h32)
2233
+ static xxh_u32 XXH32_avalanche(xxh_u32 hash)
1919
2234
  {
1920
- h32 ^= h32 >> 15;
1921
- h32 *= XXH_PRIME32_2;
1922
- h32 ^= h32 >> 13;
1923
- h32 *= XXH_PRIME32_3;
1924
- h32 ^= h32 >> 16;
1925
- return(h32);
2235
+ hash ^= hash >> 15;
2236
+ hash *= XXH_PRIME32_2;
2237
+ hash ^= hash >> 13;
2238
+ hash *= XXH_PRIME32_3;
2239
+ hash ^= hash >> 16;
2240
+ return hash;
1926
2241
  }
1927
2242
 
1928
2243
  #define XXH_get32bits(p) XXH_readLE32_align(p, align)
@@ -1935,28 +2250,31 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32)
1935
2250
  * This final stage will digest them to ensure that all input bytes are present
1936
2251
  * in the final mix.
1937
2252
  *
1938
- * @param h32 The hash to finalize.
2253
+ * @param hash The hash to finalize.
1939
2254
  * @param ptr The pointer to the remaining input.
1940
2255
  * @param len The remaining length, modulo 16.
1941
2256
  * @param align Whether @p ptr is aligned.
1942
2257
  * @return The finalized hash.
2258
+ * @see XXH64_finalize().
1943
2259
  */
1944
- static xxh_u32
1945
- XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
2260
+ static XXH_PUREF xxh_u32
2261
+ XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
1946
2262
  {
1947
- #define XXH_PROCESS1 do { \
1948
- h32 += (*ptr++) * XXH_PRIME32_5; \
1949
- h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1; \
2263
+ #define XXH_PROCESS1 do { \
2264
+ hash += (*ptr++) * XXH_PRIME32_5; \
2265
+ hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
1950
2266
  } while (0)
1951
2267
 
1952
- #define XXH_PROCESS4 do { \
1953
- h32 += XXH_get32bits(ptr) * XXH_PRIME32_3; \
1954
- ptr += 4; \
1955
- h32 = XXH_rotl32(h32, 17) * XXH_PRIME32_4; \
2268
+ #define XXH_PROCESS4 do { \
2269
+ hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
2270
+ ptr += 4; \
2271
+ hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
1956
2272
  } while (0)
1957
2273
 
1958
- /* Compact rerolled version */
1959
- if (XXH_REROLL) {
2274
+ if (ptr==NULL) XXH_ASSERT(len == 0);
2275
+
2276
+ /* Compact rerolled version; generally faster */
2277
+ if (!XXH32_ENDJMP) {
1960
2278
  len &= 15;
1961
2279
  while (len >= 4) {
1962
2280
  XXH_PROCESS4;
@@ -1966,7 +2284,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
1966
2284
  XXH_PROCESS1;
1967
2285
  --len;
1968
2286
  }
1969
- return XXH32_avalanche(h32);
2287
+ return XXH32_avalanche(hash);
1970
2288
  } else {
1971
2289
  switch(len&15) /* or switch(bEnd - p) */ {
1972
2290
  case 12: XXH_PROCESS4;
@@ -1974,7 +2292,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
1974
2292
  case 8: XXH_PROCESS4;
1975
2293
  XXH_FALLTHROUGH;
1976
2294
  case 4: XXH_PROCESS4;
1977
- return XXH32_avalanche(h32);
2295
+ return XXH32_avalanche(hash);
1978
2296
 
1979
2297
  case 13: XXH_PROCESS4;
1980
2298
  XXH_FALLTHROUGH;
@@ -1982,7 +2300,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
1982
2300
  XXH_FALLTHROUGH;
1983
2301
  case 5: XXH_PROCESS4;
1984
2302
  XXH_PROCESS1;
1985
- return XXH32_avalanche(h32);
2303
+ return XXH32_avalanche(hash);
1986
2304
 
1987
2305
  case 14: XXH_PROCESS4;
1988
2306
  XXH_FALLTHROUGH;
@@ -1991,7 +2309,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
1991
2309
  case 6: XXH_PROCESS4;
1992
2310
  XXH_PROCESS1;
1993
2311
  XXH_PROCESS1;
1994
- return XXH32_avalanche(h32);
2312
+ return XXH32_avalanche(hash);
1995
2313
 
1996
2314
  case 15: XXH_PROCESS4;
1997
2315
  XXH_FALLTHROUGH;
@@ -2005,10 +2323,10 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
2005
2323
  XXH_FALLTHROUGH;
2006
2324
  case 1: XXH_PROCESS1;
2007
2325
  XXH_FALLTHROUGH;
2008
- case 0: return XXH32_avalanche(h32);
2326
+ case 0: return XXH32_avalanche(hash);
2009
2327
  }
2010
2328
  XXH_ASSERT(0);
2011
- return h32; /* reaching this point is deemed impossible */
2329
+ return hash; /* reaching this point is deemed impossible */
2012
2330
  }
2013
2331
  }
2014
2332
 
@@ -2024,24 +2342,19 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
2024
2342
  * @internal
2025
2343
  * @brief The implementation for @ref XXH32().
2026
2344
  *
2027
- * @param input, len, seed Directly passed from @ref XXH32().
2345
+ * @param input , len , seed Directly passed from @ref XXH32().
2028
2346
  * @param align Whether @p input is aligned.
2029
2347
  * @return The calculated hash.
2030
2348
  */
2031
- XXH_FORCE_INLINE xxh_u32
2349
+ XXH_FORCE_INLINE XXH_PUREF xxh_u32
2032
2350
  XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
2033
2351
  {
2034
- const xxh_u8* bEnd = input ? input + len : NULL;
2035
2352
  xxh_u32 h32;
2036
2353
 
2037
- #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
2038
- if (input==NULL) {
2039
- len=0;
2040
- bEnd=input=(const xxh_u8*)(size_t)16;
2041
- }
2042
- #endif
2354
+ if (input==NULL) XXH_ASSERT(len == 0);
2043
2355
 
2044
2356
  if (len>=16) {
2357
+ const xxh_u8* const bEnd = input + len;
2045
2358
  const xxh_u8* const limit = bEnd - 15;
2046
2359
  xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2047
2360
  xxh_u32 v2 = seed + XXH_PRIME32_2;
@@ -2066,10 +2379,10 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
2066
2379
  return XXH32_finalize(h32, input, len&15, align);
2067
2380
  }
2068
2381
 
2069
- /*! @ingroup xxh32_family */
2382
+ /*! @ingroup XXH32_family */
2070
2383
  XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
2071
2384
  {
2072
- #if 0
2385
+ #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2073
2386
  /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2074
2387
  XXH32_state_t state;
2075
2388
  XXH32_reset(&state, seed);
@@ -2088,51 +2401,46 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s
2088
2401
 
2089
2402
 
2090
2403
  /******* Hash streaming *******/
2091
- /*!
2092
- * @ingroup xxh32_family
2093
- */
2404
+ #ifndef XXH_NO_STREAM
2405
+ /*! @ingroup XXH32_family */
2094
2406
  XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
2095
2407
  {
2096
2408
  return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
2097
2409
  }
2098
- /*! @ingroup xxh32_family */
2410
+ /*! @ingroup XXH32_family */
2099
2411
  XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
2100
2412
  {
2101
2413
  XXH_free(statePtr);
2102
2414
  return XXH_OK;
2103
2415
  }
2104
2416
 
2105
- /*! @ingroup xxh32_family */
2417
+ /*! @ingroup XXH32_family */
2106
2418
  XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
2107
2419
  {
2108
- memcpy(dstState, srcState, sizeof(*dstState));
2420
+ XXH_memcpy(dstState, srcState, sizeof(*dstState));
2109
2421
  }
2110
2422
 
2111
- /*! @ingroup xxh32_family */
2423
+ /*! @ingroup XXH32_family */
2112
2424
  XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
2113
2425
  {
2114
- XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
2115
- memset(&state, 0, sizeof(state));
2116
- state.v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2117
- state.v2 = seed + XXH_PRIME32_2;
2118
- state.v3 = seed + 0;
2119
- state.v4 = seed - XXH_PRIME32_1;
2120
- /* do not write into reserved, planned to be removed in a future version */
2121
- memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
2426
+ XXH_ASSERT(statePtr != NULL);
2427
+ memset(statePtr, 0, sizeof(*statePtr));
2428
+ statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
2429
+ statePtr->v[1] = seed + XXH_PRIME32_2;
2430
+ statePtr->v[2] = seed + 0;
2431
+ statePtr->v[3] = seed - XXH_PRIME32_1;
2122
2432
  return XXH_OK;
2123
2433
  }
2124
2434
 
2125
2435
 
2126
- /*! @ingroup xxh32_family */
2436
+ /*! @ingroup XXH32_family */
2127
2437
  XXH_PUBLIC_API XXH_errorcode
2128
2438
  XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2129
2439
  {
2130
- if (input==NULL)
2131
- #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
2440
+ if (input==NULL) {
2441
+ XXH_ASSERT(len == 0);
2132
2442
  return XXH_OK;
2133
- #else
2134
- return XXH_ERROR;
2135
- #endif
2443
+ }
2136
2444
 
2137
2445
  { const xxh_u8* p = (const xxh_u8*)input;
2138
2446
  const xxh_u8* const bEnd = p + len;
@@ -2149,10 +2457,10 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2149
2457
  if (state->memsize) { /* some data left from previous update */
2150
2458
  XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
2151
2459
  { const xxh_u32* p32 = state->mem32;
2152
- state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
2153
- state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
2154
- state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
2155
- state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
2460
+ state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
2461
+ state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
2462
+ state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
2463
+ state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
2156
2464
  }
2157
2465
  p += 16-state->memsize;
2158
2466
  state->memsize = 0;
@@ -2160,22 +2468,14 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2160
2468
 
2161
2469
  if (p <= bEnd-16) {
2162
2470
  const xxh_u8* const limit = bEnd - 16;
2163
- xxh_u32 v1 = state->v1;
2164
- xxh_u32 v2 = state->v2;
2165
- xxh_u32 v3 = state->v3;
2166
- xxh_u32 v4 = state->v4;
2167
2471
 
2168
2472
  do {
2169
- v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
2170
- v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
2171
- v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
2172
- v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
2473
+ state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
2474
+ state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
2475
+ state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
2476
+ state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
2173
2477
  } while (p<=limit);
2174
2478
 
2175
- state->v1 = v1;
2176
- state->v2 = v2;
2177
- state->v3 = v3;
2178
- state->v4 = v4;
2179
2479
  }
2180
2480
 
2181
2481
  if (p < bEnd) {
@@ -2188,30 +2488,30 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
2188
2488
  }
2189
2489
 
2190
2490
 
2191
- /*! @ingroup xxh32_family */
2491
+ /*! @ingroup XXH32_family */
2192
2492
  XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
2193
2493
  {
2194
2494
  xxh_u32 h32;
2195
2495
 
2196
2496
  if (state->large_len) {
2197
- h32 = XXH_rotl32(state->v1, 1)
2198
- + XXH_rotl32(state->v2, 7)
2199
- + XXH_rotl32(state->v3, 12)
2200
- + XXH_rotl32(state->v4, 18);
2497
+ h32 = XXH_rotl32(state->v[0], 1)
2498
+ + XXH_rotl32(state->v[1], 7)
2499
+ + XXH_rotl32(state->v[2], 12)
2500
+ + XXH_rotl32(state->v[3], 18);
2201
2501
  } else {
2202
- h32 = state->v3 /* == seed */ + XXH_PRIME32_5;
2502
+ h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
2203
2503
  }
2204
2504
 
2205
2505
  h32 += state->total_len_32;
2206
2506
 
2207
2507
  return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
2208
2508
  }
2209
-
2509
+ #endif /* !XXH_NO_STREAM */
2210
2510
 
2211
2511
  /******* Canonical representation *******/
2212
2512
 
2213
2513
  /*!
2214
- * @ingroup xxh32_family
2514
+ * @ingroup XXH32_family
2215
2515
  * The default return values from XXH functions are unsigned 32 and 64 bit
2216
2516
  * integers.
2217
2517
  *
@@ -2228,9 +2528,9 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
2228
2528
  {
2229
2529
  XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
2230
2530
  if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
2231
- memcpy(dst, &hash, sizeof(*dst));
2531
+ XXH_memcpy(dst, &hash, sizeof(*dst));
2232
2532
  }
2233
- /*! @ingroup xxh32_family */
2533
+ /*! @ingroup XXH32_family */
2234
2534
  XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
2235
2535
  {
2236
2536
  return XXH_readBE32(src);
@@ -2271,30 +2571,31 @@ static xxh_u64 XXH_read64(const void* memPtr)
2271
2571
  #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
2272
2572
 
2273
2573
  /*
2274
- * __pack instructions are safer, but compiler specific, hence potentially
2275
- * problematic for some compilers.
2276
- *
2277
- * Currently only defined for GCC and ICC.
2574
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
2575
+ * documentation claimed that it only increased the alignment, but actually it
2576
+ * can decrease it on gcc, clang, and icc:
2577
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
2578
+ * https://gcc.godbolt.org/z/xYez1j67Y.
2278
2579
  */
2279
2580
  #ifdef XXH_OLD_NAMES
2280
2581
  typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
2281
2582
  #endif
2282
2583
  static xxh_u64 XXH_read64(const void* ptr)
2283
2584
  {
2284
- typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
2285
- return ((const xxh_unalign64*)ptr)->u64;
2585
+ typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
2586
+ return *((const xxh_unalign64*)ptr);
2286
2587
  }
2287
2588
 
2288
2589
  #else
2289
2590
 
2290
2591
  /*
2291
2592
  * Portable and safe solution. Generally efficient.
2292
- * see: https://stackoverflow.com/a/32095106/646947
2593
+ * see: http://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
2293
2594
  */
2294
2595
  static xxh_u64 XXH_read64(const void* memPtr)
2295
2596
  {
2296
2597
  xxh_u64 val;
2297
- memcpy(&val, memPtr, sizeof(val));
2598
+ XXH_memcpy(&val, memPtr, sizeof(val));
2298
2599
  return val;
2299
2600
  }
2300
2601
 
@@ -2373,8 +2674,10 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
2373
2674
  /******* xxh64 *******/
2374
2675
  /*!
2375
2676
  * @}
2376
- * @defgroup xxh64_impl XXH64 implementation
2677
+ * @defgroup XXH64_impl XXH64 implementation
2377
2678
  * @ingroup impl
2679
+ *
2680
+ * Details on the XXH64 implementation.
2378
2681
  * @{
2379
2682
  */
2380
2683
  /* #define rather that static const, to be used as initializers */
@@ -2392,6 +2695,7 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
2392
2695
  # define PRIME64_5 XXH_PRIME64_5
2393
2696
  #endif
2394
2697
 
2698
+ /*! @copydoc XXH32_round */
2395
2699
  static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
2396
2700
  {
2397
2701
  acc += input * XXH_PRIME64_2;
@@ -2408,42 +2712,59 @@ static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
2408
2712
  return acc;
2409
2713
  }
2410
2714
 
2411
- static xxh_u64 XXH64_avalanche(xxh_u64 h64)
2715
+ /*! @copydoc XXH32_avalanche */
2716
+ static xxh_u64 XXH64_avalanche(xxh_u64 hash)
2412
2717
  {
2413
- h64 ^= h64 >> 33;
2414
- h64 *= XXH_PRIME64_2;
2415
- h64 ^= h64 >> 29;
2416
- h64 *= XXH_PRIME64_3;
2417
- h64 ^= h64 >> 32;
2418
- return h64;
2718
+ hash ^= hash >> 33;
2719
+ hash *= XXH_PRIME64_2;
2720
+ hash ^= hash >> 29;
2721
+ hash *= XXH_PRIME64_3;
2722
+ hash ^= hash >> 32;
2723
+ return hash;
2419
2724
  }
2420
2725
 
2421
2726
 
2422
2727
  #define XXH_get64bits(p) XXH_readLE64_align(p, align)
2423
2728
 
2424
- static xxh_u64
2425
- XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
2729
+ /*!
2730
+ * @internal
2731
+ * @brief Processes the last 0-31 bytes of @p ptr.
2732
+ *
2733
+ * There may be up to 31 bytes remaining to consume from the input.
2734
+ * This final stage will digest them to ensure that all input bytes are present
2735
+ * in the final mix.
2736
+ *
2737
+ * @param hash The hash to finalize.
2738
+ * @param ptr The pointer to the remaining input.
2739
+ * @param len The remaining length, modulo 32.
2740
+ * @param align Whether @p ptr is aligned.
2741
+ * @return The finalized hash
2742
+ * @see XXH32_finalize().
2743
+ */
2744
+ static XXH_PUREF xxh_u64
2745
+ XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
2426
2746
  {
2747
+ if (ptr==NULL) XXH_ASSERT(len == 0);
2427
2748
  len &= 31;
2428
2749
  while (len >= 8) {
2429
2750
  xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
2430
2751
  ptr += 8;
2431
- h64 ^= k1;
2432
- h64 = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2752
+ hash ^= k1;
2753
+ hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
2433
2754
  len -= 8;
2434
2755
  }
2435
2756
  if (len >= 4) {
2436
- h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
2757
+ hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
2437
2758
  ptr += 4;
2438
- h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2759
+ hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
2439
2760
  len -= 4;
2440
2761
  }
2441
2762
  while (len > 0) {
2442
- h64 ^= (*ptr++) * XXH_PRIME64_5;
2443
- h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
2763
+ hash ^= (*ptr++) * XXH_PRIME64_5;
2764
+ hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
2444
2765
  --len;
2445
2766
  }
2446
- return XXH64_avalanche(h64);
2767
+ return XXH64_avalanche(hash);
2447
2768
  }
2448
2769
 
2449
2770
  #ifdef XXH_OLD_NAMES
@@ -2456,21 +2777,23 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
2456
2777
  # undef XXH_PROCESS8_64
2457
2778
  #endif
2458
2779
 
2459
- XXH_FORCE_INLINE xxh_u64
2780
+ /*!
2781
+ * @internal
2782
+ * @brief The implementation for @ref XXH64().
2783
+ *
2784
+ * @param input , len , seed Directly passed from @ref XXH64().
2785
+ * @param align Whether @p input is aligned.
2786
+ * @return The calculated hash.
2787
+ */
2788
+ XXH_FORCE_INLINE XXH_PUREF xxh_u64
2460
2789
  XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
2461
2790
  {
2462
- const xxh_u8* bEnd = input ? input + len : NULL;
2463
2791
  xxh_u64 h64;
2464
-
2465
- #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
2466
- if (input==NULL) {
2467
- len=0;
2468
- bEnd=input=(const xxh_u8*)(size_t)32;
2469
- }
2470
- #endif
2792
+ if (input==NULL) XXH_ASSERT(len == 0);
2471
2793
 
2472
2794
  if (len>=32) {
2473
- const xxh_u8* const limit = bEnd - 32;
2795
+ const xxh_u8* const bEnd = input + len;
2796
+ const xxh_u8* const limit = bEnd - 31;
2474
2797
  xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2475
2798
  xxh_u64 v2 = seed + XXH_PRIME64_2;
2476
2799
  xxh_u64 v3 = seed + 0;
@@ -2481,7 +2804,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
2481
2804
  v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
2482
2805
  v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
2483
2806
  v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
2484
- } while (input<=limit);
2807
+ } while (input<limit);
2485
2808
 
2486
2809
  h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2487
2810
  h64 = XXH64_mergeRound(h64, v1);
@@ -2499,10 +2822,10 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
2499
2822
  }
2500
2823
 
2501
2824
 
2502
- /*! @ingroup xxh64_family */
2825
+ /*! @ingroup XXH64_family */
2503
2826
  XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
2504
2827
  {
2505
- #if 0
2828
+ #if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
2506
2829
  /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
2507
2830
  XXH64_state_t state;
2508
2831
  XXH64_reset(&state, seed);
@@ -2520,49 +2843,45 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
2520
2843
  }
2521
2844
 
2522
2845
  /******* Hash Streaming *******/
2523
-
2524
- /*! @ingroup xxh64_family*/
2846
+ #ifndef XXH_NO_STREAM
2847
+ /*! @ingroup XXH64_family*/
2525
2848
  XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
2526
2849
  {
2527
2850
  return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
2528
2851
  }
2529
- /*! @ingroup xxh64_family */
2852
+ /*! @ingroup XXH64_family */
2530
2853
  XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
2531
2854
  {
2532
2855
  XXH_free(statePtr);
2533
2856
  return XXH_OK;
2534
2857
  }
2535
2858
 
2536
- /*! @ingroup xxh64_family */
2859
+ /*! @ingroup XXH64_family */
2537
2860
  XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
2538
2861
  {
2539
- memcpy(dstState, srcState, sizeof(*dstState));
2862
+ XXH_memcpy(dstState, srcState, sizeof(*dstState));
2540
2863
  }
2541
2864
 
2542
- /*! @ingroup xxh64_family */
2865
+ /*! @ingroup XXH64_family */
2543
2866
  XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
2544
2867
  {
2545
- XXH64_state_t state; /* use a local state to memcpy() in order to avoid strict-aliasing warnings */
2546
- memset(&state, 0, sizeof(state));
2547
- state.v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2548
- state.v2 = seed + XXH_PRIME64_2;
2549
- state.v3 = seed + 0;
2550
- state.v4 = seed - XXH_PRIME64_1;
2551
- /* do not write into reserved64, might be removed in a future version */
2552
- memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
2868
+ XXH_ASSERT(statePtr != NULL);
2869
+ memset(statePtr, 0, sizeof(*statePtr));
2870
+ statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
2871
+ statePtr->v[1] = seed + XXH_PRIME64_2;
2872
+ statePtr->v[2] = seed + 0;
2873
+ statePtr->v[3] = seed - XXH_PRIME64_1;
2553
2874
  return XXH_OK;
2554
2875
  }
2555
2876
 
2556
- /*! @ingroup xxh64_family */
2877
+ /*! @ingroup XXH64_family */
2557
2878
  XXH_PUBLIC_API XXH_errorcode
2558
2879
  XXH64_update (XXH64_state_t* state, const void* input, size_t len)
2559
2880
  {
2560
- if (input==NULL)
2561
- #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
2881
+ if (input==NULL) {
2882
+ XXH_ASSERT(len == 0);
2562
2883
  return XXH_OK;
2563
- #else
2564
- return XXH_ERROR;
2565
- #endif
2884
+ }
2566
2885
 
2567
2886
  { const xxh_u8* p = (const xxh_u8*)input;
2568
2887
  const xxh_u8* const bEnd = p + len;
@@ -2577,32 +2896,24 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
2577
2896
 
2578
2897
  if (state->memsize) { /* tmp buffer is full */
2579
2898
  XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
2580
- state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
2581
- state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
2582
- state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
2583
- state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
2899
+ state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
2900
+ state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
2901
+ state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
2902
+ state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
2584
2903
  p += 32 - state->memsize;
2585
2904
  state->memsize = 0;
2586
2905
  }
2587
2906
 
2588
2907
  if (p+32 <= bEnd) {
2589
2908
  const xxh_u8* const limit = bEnd - 32;
2590
- xxh_u64 v1 = state->v1;
2591
- xxh_u64 v2 = state->v2;
2592
- xxh_u64 v3 = state->v3;
2593
- xxh_u64 v4 = state->v4;
2594
2909
 
2595
2910
  do {
2596
- v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
2597
- v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
2598
- v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
2599
- v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
2911
+ state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
2912
+ state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
2913
+ state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
2914
+ state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
2600
2915
  } while (p<=limit);
2601
2916
 
2602
- state->v1 = v1;
2603
- state->v2 = v2;
2604
- state->v3 = v3;
2605
- state->v4 = v4;
2606
2917
  }
2607
2918
 
2608
2919
  if (p < bEnd) {
@@ -2615,43 +2926,38 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
2615
2926
  }
2616
2927
 
2617
2928
 
2618
- /*! @ingroup xxh64_family */
2929
+ /*! @ingroup XXH64_family */
2619
2930
  XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
2620
2931
  {
2621
2932
  xxh_u64 h64;
2622
2933
 
2623
2934
  if (state->total_len >= 32) {
2624
- xxh_u64 const v1 = state->v1;
2625
- xxh_u64 const v2 = state->v2;
2626
- xxh_u64 const v3 = state->v3;
2627
- xxh_u64 const v4 = state->v4;
2628
-
2629
- h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
2630
- h64 = XXH64_mergeRound(h64, v1);
2631
- h64 = XXH64_mergeRound(h64, v2);
2632
- h64 = XXH64_mergeRound(h64, v3);
2633
- h64 = XXH64_mergeRound(h64, v4);
2935
+ h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
2936
+ h64 = XXH64_mergeRound(h64, state->v[0]);
2937
+ h64 = XXH64_mergeRound(h64, state->v[1]);
2938
+ h64 = XXH64_mergeRound(h64, state->v[2]);
2939
+ h64 = XXH64_mergeRound(h64, state->v[3]);
2634
2940
  } else {
2635
- h64 = state->v3 /*seed*/ + XXH_PRIME64_5;
2941
+ h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
2636
2942
  }
2637
2943
 
2638
2944
  h64 += (xxh_u64) state->total_len;
2639
2945
 
2640
2946
  return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
2641
2947
  }
2642
-
2948
+ #endif /* !XXH_NO_STREAM */
2643
2949
 
2644
2950
  /******* Canonical representation *******/
2645
2951
 
2646
- /*! @ingroup xxh64_family */
2952
+ /*! @ingroup XXH64_family */
2647
2953
  XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
2648
2954
  {
2649
2955
  XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
2650
2956
  if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
2651
- memcpy(dst, &hash, sizeof(*dst));
2957
+ XXH_memcpy(dst, &hash, sizeof(*dst));
2652
2958
  }
2653
2959
 
2654
- /*! @ingroup xxh64_family */
2960
+ /*! @ingroup XXH64_family */
2655
2961
  XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
2656
2962
  {
2657
2963
  return XXH_readBE64(src);
@@ -2665,7 +2971,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
2665
2971
  ************************************************************************ */
2666
2972
  /*!
2667
2973
  * @}
2668
- * @defgroup xxh3_impl XXH3 implementation
2974
+ * @defgroup XXH3_impl XXH3 implementation
2669
2975
  * @ingroup impl
2670
2976
  * @{
2671
2977
  */
@@ -2691,17 +2997,21 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
2691
2997
  # define XXH_unlikely(x) (x)
2692
2998
  #endif
2693
2999
 
2694
- #if defined(__GNUC__)
2695
- # if defined(__AVX2__)
2696
- # include <immintrin.h>
2697
- # elif defined(__SSE2__)
2698
- # include <emmintrin.h>
2699
- # elif defined(__ARM_NEON__) || defined(__ARM_NEON)
3000
+ #if defined(__GNUC__) || defined(__clang__)
3001
+ # if defined(__ARM_NEON__) || defined(__ARM_NEON) \
3002
+ || defined(__aarch64__) || defined(_M_ARM) \
3003
+ || defined(_M_ARM64) || defined(_M_ARM64EC)
2700
3004
  # define inline __inline__ /* circumvent a clang bug */
2701
3005
  # include <arm_neon.h>
2702
3006
  # undef inline
3007
+ # elif defined(__AVX2__)
3008
+ # include <immintrin.h>
3009
+ # elif defined(__SSE2__)
3010
+ # include <emmintrin.h>
2703
3011
  # endif
2704
- #elif defined(_MSC_VER)
3012
+ #endif
3013
+
3014
+ #if defined(_MSC_VER)
2705
3015
  # include <intrin.h>
2706
3016
  #endif
2707
3017
 
@@ -2839,17 +3149,20 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
2839
3149
  #endif
2840
3150
 
2841
3151
  #ifndef XXH_VECTOR /* can be defined on command line */
2842
- # if defined(__AVX512F__)
3152
+ # if ( \
3153
+ defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
3154
+ || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
3155
+ ) && ( \
3156
+ defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
3157
+ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
3158
+ )
3159
+ # define XXH_VECTOR XXH_NEON
3160
+ # elif defined(__AVX512F__)
2843
3161
  # define XXH_VECTOR XXH_AVX512
2844
3162
  # elif defined(__AVX2__)
2845
3163
  # define XXH_VECTOR XXH_AVX2
2846
3164
  # elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
2847
3165
  # define XXH_VECTOR XXH_SSE2
2848
- # elif defined(__GNUC__) /* msvc support maybe later */ \
2849
- && (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
2850
- && (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
2851
- || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
2852
- # define XXH_VECTOR XXH_NEON
2853
3166
  # elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
2854
3167
  || (defined(__s390x__) && defined(__VEC__)) \
2855
3168
  && defined(__GNUC__) /* TODO: IBM XL */
@@ -2911,7 +3224,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
2911
3224
  */
2912
3225
  #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
2913
3226
  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
2914
- && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
3227
+ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
2915
3228
  # pragma GCC push_options
2916
3229
  # pragma GCC optimize("-O2")
2917
3230
  #endif
@@ -2999,8 +3312,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
2999
3312
  * }
3000
3313
  */
3001
3314
  # if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
3002
- && defined(__GNUC__) \
3003
- && !defined(__aarch64__) && !defined(__arm64__)
3315
+ && (defined(__GNUC__) || defined(__clang__)) \
3316
+ && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
3004
3317
  # define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
3005
3318
  do { \
3006
3319
  /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
@@ -3017,6 +3330,76 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
3017
3330
  (outHi) = vshrn_n_u64 ((in), 32); \
3018
3331
  } while (0)
3019
3332
  # endif
3333
+
3334
+ /*!
3335
+ * @internal
3336
+ * @brief `vld1q_u64` but faster and alignment-safe.
3337
+ *
3338
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
3339
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
3340
+ *
3341
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
3342
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
3343
+ *
3344
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
3345
+ * unaligned load.
3346
+ */
3347
+ #if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
3348
+ XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
3349
+ {
3350
+ return *(uint64x2_t const*)ptr;
3351
+ }
3352
+ #else
3353
+ XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
3354
+ {
3355
+ return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
3356
+ }
3357
+ #endif
3358
+ /*!
3359
+ * @ingroup tuning
3360
+ * @brief Controls the NEON to scalar ratio for XXH3
3361
+ *
3362
+ * On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
3363
+ * 2 lanes on scalar by default.
3364
+ *
3365
+ * This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
3366
+ * emulated 64-bit arithmetic is too slow.
3367
+ *
3368
+ * Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
3369
+ *
3370
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
3371
+ * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
3372
+ * you are only using 2/3 of the CPU bandwidth.
3373
+ *
3374
+ * This is even more noticable on the more advanced cores like the A76 which
3375
+ * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
3376
+ *
3377
+ * Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
3378
+ * remaining lanes will use scalar instructions. This improves the bandwidth
3379
+ * and also gives the integer pipelines something to do besides twiddling loop
3380
+ * counters and pointers.
3381
+ *
3382
+ * This change benefits CPUs with large micro-op buffers without negatively affecting
3383
+ * other CPUs:
3384
+ *
3385
+ * | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
3386
+ * |:----------------------|:--------------------|----------:|-----------:|------:|
3387
+ * | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
3388
+ * | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
3389
+ * | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
3390
+ *
3391
+ * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
3392
+ *
3393
+ * @see XXH3_accumulate_512_neon()
3394
+ */
3395
+ # ifndef XXH3_NEON_LANES
3396
+ # if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
3397
+ && XXH_SIZE_OPT <= 0
3398
+ # define XXH3_NEON_LANES 6
3399
+ # else
3400
+ # define XXH3_NEON_LANES XXH_ACC_NB
3401
+ # endif
3402
+ # endif
3020
3403
  #endif /* XXH_VECTOR == XXH_NEON */
3021
3404
 
3022
3405
  /*
@@ -3028,23 +3411,33 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
3028
3411
  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
3029
3412
  */
3030
3413
  #if XXH_VECTOR == XXH_VSX
3414
+ /* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
3415
+ * and `pixel`. This is a problem for obvious reasons.
3416
+ *
3417
+ * These keywords are unnecessary; the spec literally says they are
3418
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
3419
+ * after including the header.
3420
+ *
3421
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
3422
+ # pragma push_macro("bool")
3423
+ # pragma push_macro("vector")
3424
+ # pragma push_macro("pixel")
3425
+ /* silence potential macro redefined warnings */
3426
+ # undef bool
3427
+ # undef vector
3428
+ # undef pixel
3429
+
3031
3430
  # if defined(__s390x__)
3032
3431
  # include <s390intrin.h>
3033
3432
  # else
3034
- /* gcc's altivec.h can have the unwanted consequence to unconditionally
3035
- * #define bool, vector, and pixel keywords,
3036
- * with bad consequences for programs already using these keywords for other purposes.
3037
- * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
3038
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
3039
- * but it seems that, in some cases, it isn't.
3040
- * Force the build macro to be defined, so that keywords are not altered.
3041
- */
3042
- # if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
3043
- # define __APPLE_ALTIVEC__
3044
- # endif
3045
3433
  # include <altivec.h>
3046
3434
  # endif
3047
3435
 
3436
+ /* Restore the original macro values, if applicable. */
3437
+ # pragma pop_macro("pixel")
3438
+ # pragma pop_macro("vector")
3439
+ # pragma pop_macro("bool")
3440
+
3048
3441
  typedef __vector unsigned long long xxh_u64x2;
3049
3442
  typedef __vector unsigned char xxh_u8x16;
3050
3443
  typedef __vector unsigned xxh_u32x4;
@@ -3083,7 +3476,7 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
3083
3476
  XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
3084
3477
  {
3085
3478
  xxh_u64x2 ret;
3086
- memcpy(&ret, ptr, sizeof(xxh_u64x2));
3479
+ XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
3087
3480
  # if XXH_VSX_BE
3088
3481
  ret = XXH_vec_revb(ret);
3089
3482
  # endif
@@ -3128,7 +3521,9 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
3128
3521
  #if defined(XXH_NO_PREFETCH)
3129
3522
  # define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
3130
3523
  #else
3131
- # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
3524
+ # if XXH_SIZE_OPT >= 1
3525
+ # define XXH_PREFETCH(ptr) (void)(ptr)
3526
+ # elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
3132
3527
  # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
3133
3528
  # define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
3134
3529
  # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
@@ -3193,7 +3588,6 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
3193
3588
  return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
3194
3589
  }
3195
3590
  #elif defined(_MSC_VER) && defined(_M_IX86)
3196
- # include <intrin.h>
3197
3591
  # define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
3198
3592
  #else
3199
3593
  /*
@@ -3212,7 +3606,7 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
3212
3606
  * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
3213
3607
  * version.
3214
3608
  *
3215
- * @param lhs, rhs The 64-bit integers to be multiplied
3609
+ * @param lhs , rhs The 64-bit integers to be multiplied
3216
3610
  * @return The 128-bit result represented in an @ref XXH128_hash_t.
3217
3611
  */
3218
3612
  static XXH128_hash_t
@@ -3233,7 +3627,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3233
3627
  * In that case it is best to use the portable one.
3234
3628
  * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
3235
3629
  */
3236
- #if defined(__GNUC__) && !defined(__wasm__) \
3630
+ #if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
3237
3631
  && defined(__SIZEOF_INT128__) \
3238
3632
  || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
3239
3633
 
@@ -3250,7 +3644,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3250
3644
  *
3251
3645
  * This compiles to single operand MUL on x64.
3252
3646
  */
3253
- #elif defined(_M_X64) || defined(_M_IA64)
3647
+ #elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
3254
3648
 
3255
3649
  #ifndef _MSC_VER
3256
3650
  # pragma intrinsic(_umul128)
@@ -3262,6 +3656,21 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3262
3656
  r128.high64 = product_high;
3263
3657
  return r128;
3264
3658
 
3659
+ /*
3660
+ * MSVC for ARM64's __umulh method.
3661
+ *
3662
+ * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
3663
+ */
3664
+ #elif defined(_M_ARM64) || defined(_M_ARM64EC)
3665
+
3666
+ #ifndef _MSC_VER
3667
+ # pragma intrinsic(__umulh)
3668
+ #endif
3669
+ XXH128_hash_t r128;
3670
+ r128.low64 = lhs * rhs;
3671
+ r128.high64 = __umulh(lhs, rhs);
3672
+ return r128;
3673
+
3265
3674
  #else
3266
3675
  /*
3267
3676
  * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
@@ -3330,7 +3739,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
3330
3739
  * The reason for the separate function is to prevent passing too many structs
3331
3740
  * around by value. This will hopefully inline the multiply, but we don't force it.
3332
3741
  *
3333
- * @param lhs, rhs The 64-bit integers to multiply
3742
+ * @param lhs , rhs The 64-bit integers to multiply
3334
3743
  * @return The low 64 bits of the product XOR'd by the high 64 bits.
3335
3744
  * @see XXH_mult64to128()
3336
3745
  */
@@ -3342,7 +3751,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
3342
3751
  }
3343
3752
 
3344
3753
  /*! Seems to produce slightly better code on GCC for some reason. */
3345
- XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
3754
+ XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
3346
3755
  {
3347
3756
  XXH_ASSERT(0 <= shift && shift < 64);
3348
3757
  return v64 ^ (v64 >> shift);
@@ -3409,7 +3818,7 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
3409
3818
  *
3410
3819
  * This adds an extra layer of strength for custom secrets.
3411
3820
  */
3412
- XXH_FORCE_INLINE XXH64_hash_t
3821
+ XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3413
3822
  XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3414
3823
  {
3415
3824
  XXH_ASSERT(input != NULL);
@@ -3431,7 +3840,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
3431
3840
  }
3432
3841
  }
3433
3842
 
3434
- XXH_FORCE_INLINE XXH64_hash_t
3843
+ XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3435
3844
  XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3436
3845
  {
3437
3846
  XXH_ASSERT(input != NULL);
@@ -3447,7 +3856,7 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
3447
3856
  }
3448
3857
  }
3449
3858
 
3450
- XXH_FORCE_INLINE XXH64_hash_t
3859
+ XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3451
3860
  XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3452
3861
  {
3453
3862
  XXH_ASSERT(input != NULL);
@@ -3464,7 +3873,7 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
3464
3873
  }
3465
3874
  }
3466
3875
 
3467
- XXH_FORCE_INLINE XXH64_hash_t
3876
+ XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3468
3877
  XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
3469
3878
  {
3470
3879
  XXH_ASSERT(len <= 16);
@@ -3534,7 +3943,7 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
3534
3943
  }
3535
3944
 
3536
3945
  /* For mid range keys, XXH3 uses a Mum-hash variant. */
3537
- XXH_FORCE_INLINE XXH64_hash_t
3946
+ XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
3538
3947
  XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3539
3948
  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
3540
3949
  XXH64_hash_t seed)
@@ -3543,6 +3952,14 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3543
3952
  XXH_ASSERT(16 < len && len <= 128);
3544
3953
 
3545
3954
  { xxh_u64 acc = len * XXH_PRIME64_1;
3955
+ #if XXH_SIZE_OPT >= 1
3956
+ /* Smaller and cleaner, but slightly slower. */
3957
+ size_t i = (len - 1) / 32;
3958
+ do {
3959
+ acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
3960
+ acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
3961
+ } while (i-- != 0);
3962
+ #else
3546
3963
  if (len > 32) {
3547
3964
  if (len > 64) {
3548
3965
  if (len > 96) {
@@ -3557,14 +3974,14 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3557
3974
  }
3558
3975
  acc += XXH3_mix16B(input+0, secret+0, seed);
3559
3976
  acc += XXH3_mix16B(input+len-16, secret+16, seed);
3560
-
3977
+ #endif
3561
3978
  return XXH3_avalanche(acc);
3562
3979
  }
3563
3980
  }
3564
3981
 
3565
3982
  #define XXH3_MIDSIZE_MAX 240
3566
3983
 
3567
- XXH_NO_INLINE XXH64_hash_t
3984
+ XXH_NO_INLINE XXH_PUREF XXH64_hash_t
3568
3985
  XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3569
3986
  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
3570
3987
  XXH64_hash_t seed)
@@ -3632,7 +4049,7 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
3632
4049
  XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
3633
4050
  {
3634
4051
  if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
3635
- memcpy(dst, &v64, sizeof(v64));
4052
+ XXH_memcpy(dst, &v64, sizeof(v64));
3636
4053
  }
3637
4054
 
3638
4055
  /* Several intrinsic functions below are supposed to accept __int64 as argument,
@@ -3649,6 +4066,7 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
3649
4066
  typedef long long xxh_i64;
3650
4067
  #endif
3651
4068
 
4069
+
3652
4070
  /*
3653
4071
  * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
3654
4072
  *
@@ -3684,7 +4102,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
3684
4102
  const void* XXH_RESTRICT input,
3685
4103
  const void* XXH_RESTRICT secret)
3686
4104
  {
3687
- XXH_ALIGN(64) __m512i* const xacc = (__m512i *) acc;
4105
+ __m512i* const xacc = (__m512i *) acc;
3688
4106
  XXH_ASSERT((((size_t)acc) & 63) == 0);
3689
4107
  XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
3690
4108
 
@@ -3733,7 +4151,7 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
3733
4151
  {
3734
4152
  XXH_ASSERT((((size_t)acc) & 63) == 0);
3735
4153
  XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
3736
- { XXH_ALIGN(64) __m512i* const xacc = (__m512i*) acc;
4154
+ { __m512i* const xacc = (__m512i*) acc;
3737
4155
  const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
3738
4156
 
3739
4157
  /* xacc[0] ^= (xacc[0] >> 47) */
@@ -3794,7 +4212,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
3794
4212
  const void* XXH_RESTRICT secret)
3795
4213
  {
3796
4214
  XXH_ASSERT((((size_t)acc) & 31) == 0);
3797
- { XXH_ALIGN(32) __m256i* const xacc = (__m256i *) acc;
4215
+ { __m256i* const xacc = (__m256i *) acc;
3798
4216
  /* Unaligned. This is mainly for pointer arithmetic, and because
3799
4217
  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
3800
4218
  const __m256i* const xinput = (const __m256i *) input;
@@ -3826,7 +4244,7 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void
3826
4244
  XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
3827
4245
  {
3828
4246
  XXH_ASSERT((((size_t)acc) & 31) == 0);
3829
- { XXH_ALIGN(32) __m256i* const xacc = (__m256i*) acc;
4247
+ { __m256i* const xacc = (__m256i*) acc;
3830
4248
  /* Unaligned. This is mainly for pointer arithmetic, and because
3831
4249
  * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
3832
4250
  const __m256i* const xsecret = (const __m256i *) secret;
@@ -3900,7 +4318,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
3900
4318
  {
3901
4319
  /* SSE2 is just a half-scale version of the AVX2 version. */
3902
4320
  XXH_ASSERT((((size_t)acc) & 15) == 0);
3903
- { XXH_ALIGN(16) __m128i* const xacc = (__m128i *) acc;
4321
+ { __m128i* const xacc = (__m128i *) acc;
3904
4322
  /* Unaligned. This is mainly for pointer arithmetic, and because
3905
4323
  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
3906
4324
  const __m128i* const xinput = (const __m128i *) input;
@@ -3932,7 +4350,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void
3932
4350
  XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
3933
4351
  {
3934
4352
  XXH_ASSERT((((size_t)acc) & 15) == 0);
3935
- { XXH_ALIGN(16) __m128i* const xacc = (__m128i*) acc;
4353
+ { __m128i* const xacc = (__m128i*) acc;
3936
4354
  /* Unaligned. This is mainly for pointer arithmetic, and because
3937
4355
  * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
3938
4356
  const __m128i* const xsecret = (const __m128i *) secret;
@@ -3994,40 +4412,66 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
3994
4412
 
3995
4413
  #if (XXH_VECTOR == XXH_NEON)
3996
4414
 
4415
+ /* forward declarations for the scalar routines */
4416
+ XXH_FORCE_INLINE void
4417
+ XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
4418
+ void const* XXH_RESTRICT secret, size_t lane);
4419
+
4420
+ XXH_FORCE_INLINE void
4421
+ XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
4422
+ void const* XXH_RESTRICT secret, size_t lane);
4423
+
4424
+ /*!
4425
+ * @internal
4426
+ * @brief The bulk processing loop for NEON.
4427
+ *
4428
+ * The NEON code path is actually partially scalar when running on AArch64. This
4429
+ * is to optimize the pipelining and can have up to 15% speedup depending on the
4430
+ * CPU, and it also mitigates some GCC codegen issues.
4431
+ *
4432
+ * @see XXH3_NEON_LANES for configuring this and details about this optimization.
4433
+ */
3997
4434
  XXH_FORCE_INLINE void
3998
4435
  XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
3999
4436
  const void* XXH_RESTRICT input,
4000
4437
  const void* XXH_RESTRICT secret)
4001
4438
  {
4002
4439
  XXH_ASSERT((((size_t)acc) & 15) == 0);
4440
+ XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
4003
4441
  {
4004
- XXH_ALIGN(16) uint64x2_t* const xacc = (uint64x2_t *) acc;
4442
+ uint64x2_t* const xacc = (uint64x2_t *) acc;
4005
4443
  /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
4006
4444
  uint8_t const* const xinput = (const uint8_t *) input;
4007
4445
  uint8_t const* const xsecret = (const uint8_t *) secret;
4008
4446
 
4009
4447
  size_t i;
4010
- for (i=0; i < XXH_STRIPE_LEN / sizeof(uint64x2_t); i++) {
4448
+ /* AArch64 uses both scalar and neon at the same time */
4449
+ for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4450
+ XXH3_scalarRound(acc, input, secret, i);
4451
+ }
4452
+ for (i=0; i < XXH3_NEON_LANES / 2; i++) {
4453
+ uint64x2_t acc_vec = xacc[i];
4011
4454
  /* data_vec = xinput[i]; */
4012
- uint8x16_t data_vec = vld1q_u8(xinput + (i * 16));
4455
+ uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
4013
4456
  /* key_vec = xsecret[i]; */
4014
- uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16));
4457
+ uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
4015
4458
  uint64x2_t data_key;
4016
4459
  uint32x2_t data_key_lo, data_key_hi;
4017
- /* xacc[i] += swap(data_vec); */
4018
- uint64x2_t const data64 = vreinterpretq_u64_u8(data_vec);
4019
- uint64x2_t const swapped = vextq_u64(data64, data64, 1);
4020
- xacc[i] = vaddq_u64 (xacc[i], swapped);
4460
+ /* acc_vec_2 = swap(data_vec) */
4461
+ uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
4021
4462
  /* data_key = data_vec ^ key_vec; */
4022
- data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
4463
+ data_key = veorq_u64(data_vec, key_vec);
4023
4464
  /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
4024
4465
  * data_key_hi = (uint32x2_t) (data_key >> 32);
4025
4466
  * data_key = UNDEFINED; */
4026
4467
  XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
4027
- /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
4028
- xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
4029
-
4468
+ /* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
4469
+ acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
4470
+ /* xacc[i] += acc_vec_2; */
4471
+ acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
4472
+ xacc[i] = acc_vec;
4030
4473
  }
4474
+
4031
4475
  }
4032
4476
  }
4033
4477
 
@@ -4041,15 +4485,19 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4041
4485
  uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
4042
4486
 
4043
4487
  size_t i;
4044
- for (i=0; i < XXH_STRIPE_LEN/sizeof(uint64x2_t); i++) {
4488
+ /* AArch64 uses both scalar and neon at the same time */
4489
+ for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
4490
+ XXH3_scalarScrambleRound(acc, secret, i);
4491
+ }
4492
+ for (i=0; i < XXH3_NEON_LANES / 2; i++) {
4045
4493
  /* xacc[i] ^= (xacc[i] >> 47); */
4046
4494
  uint64x2_t acc_vec = xacc[i];
4047
- uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
4048
- uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
4495
+ uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
4496
+ uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
4049
4497
 
4050
4498
  /* xacc[i] ^= xsecret[i]; */
4051
- uint8x16_t key_vec = vld1q_u8(xsecret + (i * 16));
4052
- uint64x2_t data_key = veorq_u64(data_vec, vreinterpretq_u64_u8(key_vec));
4499
+ uint64x2_t key_vec = XXH_vld1q_u64 (xsecret + (i * 16));
4500
+ uint64x2_t data_key = veorq_u64 (data_vec, key_vec);
4053
4501
 
4054
4502
  /* xacc[i] *= XXH_PRIME32_1 */
4055
4503
  uint32x2_t data_key_lo, data_key_hi;
@@ -4077,11 +4525,12 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4077
4525
  */
4078
4526
  uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
4079
4527
  /* xacc[i] = prod_hi << 32; */
4080
- xacc[i] = vshlq_n_u64(prod_hi, 32);
4528
+ prod_hi = vshlq_n_u64(prod_hi, 32);
4081
4529
  /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
4082
- xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
4530
+ xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
4083
4531
  }
4084
- } }
4532
+ }
4533
+ }
4085
4534
  }
4086
4535
 
4087
4536
  #endif
@@ -4093,7 +4542,8 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
4093
4542
  const void* XXH_RESTRICT input,
4094
4543
  const void* XXH_RESTRICT secret)
4095
4544
  {
4096
- xxh_u64x2* const xacc = (xxh_u64x2*) acc; /* presumed aligned */
4545
+ /* presumed aligned */
4546
+ unsigned int* const xacc = (unsigned int*) acc;
4097
4547
  xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
4098
4548
  xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
4099
4549
  xxh_u64x2 const v32 = { 32, 32 };
@@ -4108,14 +4558,18 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
4108
4558
  xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
4109
4559
  /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
4110
4560
  xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
4111
- xacc[i] += product;
4561
+ /* acc_vec = xacc[i]; */
4562
+ xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
4563
+ acc_vec += product;
4112
4564
 
4113
4565
  /* swap high and low halves */
4114
4566
  #ifdef __s390x__
4115
- xacc[i] += vec_permi(data_vec, data_vec, 2);
4567
+ acc_vec += vec_permi(data_vec, data_vec, 2);
4116
4568
  #else
4117
- xacc[i] += vec_xxpermdi(data_vec, data_vec, 2);
4569
+ acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
4118
4570
  #endif
4571
+ /* xacc[i] = acc_vec; */
4572
+ vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
4119
4573
  }
4120
4574
  }
4121
4575
 
@@ -4153,38 +4607,90 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4153
4607
 
4154
4608
  /* scalar variants - universal */
4155
4609
 
4610
+ /*!
4611
+ * @internal
4612
+ * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
4613
+ *
4614
+ * This is extracted to its own function because the NEON path uses a combination
4615
+ * of NEON and scalar.
4616
+ */
4617
+ XXH_FORCE_INLINE void
4618
+ XXH3_scalarRound(void* XXH_RESTRICT acc,
4619
+ void const* XXH_RESTRICT input,
4620
+ void const* XXH_RESTRICT secret,
4621
+ size_t lane)
4622
+ {
4623
+ xxh_u64* xacc = (xxh_u64*) acc;
4624
+ xxh_u8 const* xinput = (xxh_u8 const*) input;
4625
+ xxh_u8 const* xsecret = (xxh_u8 const*) secret;
4626
+ XXH_ASSERT(lane < XXH_ACC_NB);
4627
+ XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
4628
+ {
4629
+ xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
4630
+ xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
4631
+ xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
4632
+ xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
4633
+ }
4634
+ }
4635
+
4636
+ /*!
4637
+ * @internal
4638
+ * @brief Processes a 64 byte block of data using the scalar path.
4639
+ */
4156
4640
  XXH_FORCE_INLINE void
4157
4641
  XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
4158
4642
  const void* XXH_RESTRICT input,
4159
4643
  const void* XXH_RESTRICT secret)
4160
4644
  {
4161
- XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
4162
- const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
4163
- const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
4164
4645
  size_t i;
4165
- XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
4646
+ /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
4647
+ #if defined(__GNUC__) && !defined(__clang__) \
4648
+ && (defined(__arm__) || defined(__thumb2__)) \
4649
+ && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
4650
+ && XXH_SIZE_OPT <= 0
4651
+ # pragma GCC unroll 8
4652
+ #endif
4166
4653
  for (i=0; i < XXH_ACC_NB; i++) {
4167
- xxh_u64 const data_val = XXH_readLE64(xinput + 8*i);
4168
- xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
4169
- xacc[i ^ 1] += data_val; /* swap adjacent lanes */
4170
- xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
4654
+ XXH3_scalarRound(acc, input, secret, i);
4171
4655
  }
4172
4656
  }
4173
4657
 
4658
+ /*!
4659
+ * @internal
4660
+ * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
4661
+ *
4662
+ * This is extracted to its own function because the NEON path uses a combination
4663
+ * of NEON and scalar.
4664
+ */
4174
4665
  XXH_FORCE_INLINE void
4175
- XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4666
+ XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
4667
+ void const* XXH_RESTRICT secret,
4668
+ size_t lane)
4176
4669
  {
4177
- XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
4670
+ xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
4178
4671
  const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
4179
- size_t i;
4180
4672
  XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
4181
- for (i=0; i < XXH_ACC_NB; i++) {
4182
- xxh_u64 const key64 = XXH_readLE64(xsecret + 8*i);
4183
- xxh_u64 acc64 = xacc[i];
4673
+ XXH_ASSERT(lane < XXH_ACC_NB);
4674
+ {
4675
+ xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
4676
+ xxh_u64 acc64 = xacc[lane];
4184
4677
  acc64 = XXH_xorshift64(acc64, 47);
4185
4678
  acc64 ^= key64;
4186
4679
  acc64 *= XXH_PRIME32_1;
4187
- xacc[i] = acc64;
4680
+ xacc[lane] = acc64;
4681
+ }
4682
+ }
4683
+
4684
+ /*!
4685
+ * @internal
4686
+ * @brief Scrambles the accumulators after a large chunk has been read
4687
+ */
4688
+ XXH_FORCE_INLINE void
4689
+ XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
4690
+ {
4691
+ size_t i;
4692
+ for (i=0; i < XXH_ACC_NB; i++) {
4693
+ XXH3_scalarScrambleRound(acc, secret, i);
4188
4694
  }
4189
4695
  }
4190
4696
 
@@ -4206,8 +4712,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4206
4712
  * placed sequentially, in order, at the top of the unrolled loop.
4207
4713
  *
4208
4714
  * While MOVK is great for generating constants (2 cycles for a 64-bit
4209
- * constant compared to 4 cycles for LDR), long MOVK chains stall the
4210
- * integer pipelines:
4715
+ * constant compared to 4 cycles for LDR), it fights for bandwidth with
4716
+ * the arithmetic instructions.
4717
+ *
4211
4718
  * I L S
4212
4719
  * MOVK
4213
4720
  * MOVK
@@ -4224,6 +4731,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
4224
4731
  * ADD LDR
4225
4732
  * SUB STR
4226
4733
  * STR
4734
+ *
4735
+ * See XXH3_NEON_LANES for details on the pipsline.
4736
+ *
4227
4737
  * XXH3_64bits_withSeed, len == 256, Snapdragon 835
4228
4738
  * without hack: 2654.4 MB/s
4229
4739
  * with hack: 3202.9 MB/s
@@ -4296,7 +4806,10 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
4296
4806
 
4297
4807
  #endif
4298
4808
 
4299
-
4809
+ #if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
4810
+ # undef XXH3_initCustomSecret
4811
+ # define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
4812
+ #endif
4300
4813
 
4301
4814
  #ifndef XXH_PREFETCH_DIST
4302
4815
  # ifdef __clang__
@@ -4422,9 +4935,11 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
4422
4935
  }
4423
4936
 
4424
4937
  /*
4425
- * It's important for performance that XXH3_hashLong is not inlined.
4938
+ * It's important for performance to transmit secret's size (when it's static)
4939
+ * so that the compiler can properly optimize the vectorized loop.
4940
+ * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
4426
4941
  */
4427
- XXH_NO_INLINE XXH64_hash_t
4942
+ XXH_FORCE_INLINE XXH64_hash_t
4428
4943
  XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
4429
4944
  XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
4430
4945
  {
@@ -4433,13 +4948,12 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
4433
4948
  }
4434
4949
 
4435
4950
  /*
4436
- * It's important for performance that XXH3_hashLong is not inlined.
4437
- * Since the function is not inlined, the compiler may not be able to understand that,
4438
- * in some scenarios, its `secret` argument is actually a compile time constant.
4439
- * This variant enforces that the compiler can detect that,
4440
- * and uses this opportunity to streamline the generated code for better performance.
4951
+ * It's preferable for performance that XXH3_hashLong is not inlined,
4952
+ * as it results in a smaller function for small data, easier to the instruction cache.
4953
+ * Note that inside this no_inline function, we do inline the internal loop,
4954
+ * and provide a statically defined secret size to allow optimization of vector loop.
4441
4955
  */
4442
- XXH_NO_INLINE XXH64_hash_t
4956
+ XXH_NO_INLINE XXH_PUREF XXH64_hash_t
4443
4957
  XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
4444
4958
  XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
4445
4959
  {
@@ -4465,10 +4979,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
4465
4979
  XXH3_f_scrambleAcc f_scramble,
4466
4980
  XXH3_f_initCustomSecret f_initSec)
4467
4981
  {
4982
+ #if XXH_SIZE_OPT <= 0
4468
4983
  if (seed == 0)
4469
4984
  return XXH3_hashLong_64b_internal(input, len,
4470
4985
  XXH3_kSecret, sizeof(XXH3_kSecret),
4471
4986
  f_acc512, f_scramble);
4987
+ #endif
4472
4988
  { XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
4473
4989
  f_initSec(secret, seed);
4474
4990
  return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
@@ -4517,29 +5033,37 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
4517
5033
 
4518
5034
  /* === Public entry point === */
4519
5035
 
4520
- /*! @ingroup xxh3_family */
4521
- XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
5036
+ /*! @ingroup XXH3_family */
5037
+ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t length)
4522
5038
  {
4523
- return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
5039
+ return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
4524
5040
  }
4525
5041
 
4526
- /*! @ingroup xxh3_family */
5042
+ /*! @ingroup XXH3_family */
4527
5043
  XXH_PUBLIC_API XXH64_hash_t
4528
- XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
5044
+ XXH3_64bits_withSecret(const void* input, size_t length, const void* secret, size_t secretSize)
4529
5045
  {
4530
- return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
5046
+ return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
4531
5047
  }
4532
5048
 
4533
- /*! @ingroup xxh3_family */
5049
+ /*! @ingroup XXH3_family */
4534
5050
  XXH_PUBLIC_API XXH64_hash_t
4535
- XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
5051
+ XXH3_64bits_withSeed(const void* input, size_t length, XXH64_hash_t seed)
4536
5052
  {
4537
- return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
5053
+ return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
4538
5054
  }
4539
5055
 
5056
+ XXH_PUBLIC_API XXH64_hash_t
5057
+ XXH3_64bits_withSecretandSeed(const void* input, size_t length, const void* secret, size_t secretSize, XXH64_hash_t seed)
5058
+ {
5059
+ if (length <= XXH3_MIDSIZE_MAX)
5060
+ return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5061
+ return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
5062
+ }
4540
5063
 
4541
- /* === XXH3 streaming === */
4542
5064
 
5065
+ /* === XXH3 streaming === */
5066
+ #ifndef XXH_NO_STREAM
4543
5067
  /*
4544
5068
  * Malloc's a pointer that is always aligned to align.
4545
5069
  *
@@ -4563,7 +5087,7 @@ XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
4563
5087
  *
4564
5088
  * Align must be a power of 2 and 8 <= align <= 128.
4565
5089
  */
4566
- static void* XXH_alignedMalloc(size_t s, size_t align)
5090
+ static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
4567
5091
  {
4568
5092
  XXH_ASSERT(align <= 128 && align >= 8); /* range check */
4569
5093
  XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
@@ -4605,7 +5129,7 @@ static void XXH_alignedFree(void* p)
4605
5129
  XXH_free(base);
4606
5130
  }
4607
5131
  }
4608
- /*! @ingroup xxh3_family */
5132
+ /*! @ingroup XXH3_family */
4609
5133
  XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
4610
5134
  {
4611
5135
  XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
@@ -4614,24 +5138,24 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
4614
5138
  return state;
4615
5139
  }
4616
5140
 
4617
- /*! @ingroup xxh3_family */
5141
+ /*! @ingroup XXH3_family */
4618
5142
  XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
4619
5143
  {
4620
5144
  XXH_alignedFree(statePtr);
4621
5145
  return XXH_OK;
4622
5146
  }
4623
5147
 
4624
- /*! @ingroup xxh3_family */
5148
+ /*! @ingroup XXH3_family */
4625
5149
  XXH_PUBLIC_API void
4626
5150
  XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
4627
5151
  {
4628
- memcpy(dst_state, src_state, sizeof(*dst_state));
5152
+ XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
4629
5153
  }
4630
5154
 
4631
5155
  static void
4632
5156
  XXH3_reset_internal(XXH3_state_t* statePtr,
4633
- XXH64_hash_t seed,
4634
- const void* secret, size_t secretSize)
5157
+ XXH64_hash_t seed,
5158
+ const void* secret, size_t secretSize)
4635
5159
  {
4636
5160
  size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
4637
5161
  size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
@@ -4648,13 +5172,14 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
4648
5172
  statePtr->acc[6] = XXH_PRIME64_5;
4649
5173
  statePtr->acc[7] = XXH_PRIME32_1;
4650
5174
  statePtr->seed = seed;
5175
+ statePtr->useSeed = (seed != 0);
4651
5176
  statePtr->extSecret = (const unsigned char*)secret;
4652
5177
  XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
4653
5178
  statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
4654
5179
  statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
4655
5180
  }
4656
5181
 
4657
- /*! @ingroup xxh3_family */
5182
+ /*! @ingroup XXH3_family */
4658
5183
  XXH_PUBLIC_API XXH_errorcode
4659
5184
  XXH3_64bits_reset(XXH3_state_t* statePtr)
4660
5185
  {
@@ -4663,7 +5188,7 @@ XXH3_64bits_reset(XXH3_state_t* statePtr)
4663
5188
  return XXH_OK;
4664
5189
  }
4665
5190
 
4666
- /*! @ingroup xxh3_family */
5191
+ /*! @ingroup XXH3_family */
4667
5192
  XXH_PUBLIC_API XXH_errorcode
4668
5193
  XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
4669
5194
  {
@@ -4674,17 +5199,30 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t
4674
5199
  return XXH_OK;
4675
5200
  }
4676
5201
 
4677
- /*! @ingroup xxh3_family */
5202
+ /*! @ingroup XXH3_family */
4678
5203
  XXH_PUBLIC_API XXH_errorcode
4679
5204
  XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
4680
5205
  {
4681
5206
  if (statePtr == NULL) return XXH_ERROR;
4682
5207
  if (seed==0) return XXH3_64bits_reset(statePtr);
4683
- if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
5208
+ if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
5209
+ XXH3_initCustomSecret(statePtr->customSecret, seed);
4684
5210
  XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
4685
5211
  return XXH_OK;
4686
5212
  }
4687
5213
 
5214
+ /*! @ingroup XXH3_family */
5215
+ XXH_PUBLIC_API XXH_errorcode
5216
+ XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
5217
+ {
5218
+ if (statePtr == NULL) return XXH_ERROR;
5219
+ if (secret == NULL) return XXH_ERROR;
5220
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5221
+ XXH3_reset_internal(statePtr, seed64, secret, secretSize);
5222
+ statePtr->useSeed = 1; /* always, even if seed64==0 */
5223
+ return XXH_OK;
5224
+ }
5225
+
4688
5226
  /* Note : when XXH3_consumeStripes() is invoked,
4689
5227
  * there must be a guarantee that at least one more byte must be consumed from input
4690
5228
  * so that the function can blindly consume all stripes using the "normal" secret segment */
@@ -4712,35 +5250,48 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
4712
5250
  }
4713
5251
  }
4714
5252
 
5253
+ #ifndef XXH3_STREAM_USE_STACK
5254
+ # if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
5255
+ # define XXH3_STREAM_USE_STACK 1
5256
+ # endif
5257
+ #endif
4715
5258
  /*
4716
5259
  * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
4717
5260
  */
4718
5261
  XXH_FORCE_INLINE XXH_errorcode
4719
- XXH3_update(XXH3_state_t* state,
4720
- const xxh_u8* input, size_t len,
5262
+ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
5263
+ const xxh_u8* XXH_RESTRICT input, size_t len,
4721
5264
  XXH3_f_accumulate_512 f_acc512,
4722
5265
  XXH3_f_scrambleAcc f_scramble)
4723
5266
  {
4724
- if (input==NULL)
4725
- #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
5267
+ if (input==NULL) {
5268
+ XXH_ASSERT(len == 0);
4726
5269
  return XXH_OK;
4727
- #else
4728
- return XXH_ERROR;
4729
- #endif
5270
+ }
4730
5271
 
5272
+ XXH_ASSERT(state != NULL);
4731
5273
  { const xxh_u8* const bEnd = input + len;
4732
5274
  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
4733
-
5275
+ #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5276
+ /* For some reason, gcc and MSVC seem to suffer greatly
5277
+ * when operating accumulators directly into state.
5278
+ * Operating into stack space seems to enable proper optimization.
5279
+ * clang, on the other hand, doesn't seem to need this trick */
5280
+ XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
5281
+ #else
5282
+ xxh_u64* XXH_RESTRICT const acc = state->acc;
5283
+ #endif
4734
5284
  state->totalLen += len;
4735
5285
  XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
4736
5286
 
4737
- if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) { /* fill in tmp buffer */
5287
+ /* small input : just fill in tmp buffer */
5288
+ if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
4738
5289
  XXH_memcpy(state->buffer + state->bufferedSize, input, len);
4739
5290
  state->bufferedSize += (XXH32_hash_t)len;
4740
5291
  return XXH_OK;
4741
5292
  }
4742
- /* total input is now > XXH3_INTERNALBUFFER_SIZE */
4743
5293
 
5294
+ /* total input is now > XXH3_INTERNALBUFFER_SIZE */
4744
5295
  #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
4745
5296
  XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
4746
5297
 
@@ -4752,7 +5303,7 @@ XXH3_update(XXH3_state_t* state,
4752
5303
  size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
4753
5304
  XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
4754
5305
  input += loadSize;
4755
- XXH3_consumeStripes(state->acc,
5306
+ XXH3_consumeStripes(acc,
4756
5307
  &state->nbStripesSoFar, state->nbStripesPerBlock,
4757
5308
  state->buffer, XXH3_INTERNALBUFFER_STRIPES,
4758
5309
  secret, state->secretLimit,
@@ -4761,31 +5312,68 @@ XXH3_update(XXH3_state_t* state,
4761
5312
  }
4762
5313
  XXH_ASSERT(input < bEnd);
4763
5314
 
4764
- /* Consume input by a multiple of internal buffer size */
4765
- if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
4766
- const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
4767
- do {
4768
- XXH3_consumeStripes(state->acc,
4769
- &state->nbStripesSoFar, state->nbStripesPerBlock,
4770
- input, XXH3_INTERNALBUFFER_STRIPES,
4771
- secret, state->secretLimit,
4772
- f_acc512, f_scramble);
4773
- input += XXH3_INTERNALBUFFER_SIZE;
4774
- } while (input<limit);
4775
- /* for last partial stripe */
4776
- memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5315
+ /* large input to consume : ingest per full block */
5316
+ if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
5317
+ size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
5318
+ XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
5319
+ /* join to current block's end */
5320
+ { size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
5321
+ XXH_ASSERT(nbStripesToEnd <= nbStripes);
5322
+ XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
5323
+ f_scramble(acc, secret + state->secretLimit);
5324
+ state->nbStripesSoFar = 0;
5325
+ input += nbStripesToEnd * XXH_STRIPE_LEN;
5326
+ nbStripes -= nbStripesToEnd;
5327
+ }
5328
+ /* consume per entire blocks */
5329
+ while(nbStripes >= state->nbStripesPerBlock) {
5330
+ XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
5331
+ f_scramble(acc, secret + state->secretLimit);
5332
+ input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
5333
+ nbStripes -= state->nbStripesPerBlock;
5334
+ }
5335
+ /* consume last partial block */
5336
+ XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
5337
+ input += nbStripes * XXH_STRIPE_LEN;
5338
+ XXH_ASSERT(input < bEnd); /* at least some bytes left */
5339
+ state->nbStripesSoFar = nbStripes;
5340
+ /* buffer predecessor of last partial stripe */
5341
+ XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5342
+ XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
5343
+ } else {
5344
+ /* content to consume <= block size */
5345
+ /* Consume input by a multiple of internal buffer size */
5346
+ if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
5347
+ const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
5348
+ do {
5349
+ XXH3_consumeStripes(acc,
5350
+ &state->nbStripesSoFar, state->nbStripesPerBlock,
5351
+ input, XXH3_INTERNALBUFFER_STRIPES,
5352
+ secret, state->secretLimit,
5353
+ f_acc512, f_scramble);
5354
+ input += XXH3_INTERNALBUFFER_SIZE;
5355
+ } while (input<limit);
5356
+ /* buffer predecessor of last partial stripe */
5357
+ XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
5358
+ }
4777
5359
  }
4778
- XXH_ASSERT(input < bEnd);
4779
5360
 
4780
5361
  /* Some remaining input (always) : buffer it */
5362
+ XXH_ASSERT(input < bEnd);
5363
+ XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
5364
+ XXH_ASSERT(state->bufferedSize == 0);
4781
5365
  XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
4782
5366
  state->bufferedSize = (XXH32_hash_t)(bEnd-input);
5367
+ #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
5368
+ /* save stack accumulators into state */
5369
+ memcpy(state->acc, acc, sizeof(acc));
5370
+ #endif
4783
5371
  }
4784
5372
 
4785
5373
  return XXH_OK;
4786
5374
  }
4787
5375
 
4788
- /*! @ingroup xxh3_family */
5376
+ /*! @ingroup XXH3_family */
4789
5377
  XXH_PUBLIC_API XXH_errorcode
4790
5378
  XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
4791
5379
  {
@@ -4803,7 +5391,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
4803
5391
  * Digest on a local copy. This way, the state remains unaltered, and it can
4804
5392
  * continue ingesting more input afterwards.
4805
5393
  */
4806
- memcpy(acc, state->acc, sizeof(state->acc));
5394
+ XXH_memcpy(acc, state->acc, sizeof(state->acc));
4807
5395
  if (state->bufferedSize >= XXH_STRIPE_LEN) {
4808
5396
  size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
4809
5397
  size_t nbStripesSoFar = state->nbStripesSoFar;
@@ -4820,15 +5408,15 @@ XXH3_digest_long (XXH64_hash_t* acc,
4820
5408
  xxh_u8 lastStripe[XXH_STRIPE_LEN];
4821
5409
  size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
4822
5410
  XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
4823
- memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
4824
- memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
5411
+ XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
5412
+ XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
4825
5413
  XXH3_accumulate_512(acc,
4826
5414
  lastStripe,
4827
5415
  secret + state->secretLimit - XXH_SECRET_LASTACC_START);
4828
5416
  }
4829
5417
  }
4830
5418
 
4831
- /*! @ingroup xxh3_family */
5419
+ /*! @ingroup XXH3_family */
4832
5420
  XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
4833
5421
  {
4834
5422
  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
@@ -4840,57 +5428,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
4840
5428
  (xxh_u64)state->totalLen * XXH_PRIME64_1);
4841
5429
  }
4842
5430
  /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
4843
- if (state->seed)
5431
+ if (state->useSeed)
4844
5432
  return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
4845
5433
  return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
4846
5434
  secret, state->secretLimit + XXH_STRIPE_LEN);
4847
5435
  }
4848
-
4849
-
4850
- #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
4851
-
4852
- /*! @ingroup xxh3_family */
4853
- XXH_PUBLIC_API void
4854
- XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
4855
- {
4856
- XXH_ASSERT(secretBuffer != NULL);
4857
- if (customSeedSize == 0) {
4858
- memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
4859
- return;
4860
- }
4861
- XXH_ASSERT(customSeed != NULL);
4862
-
4863
- { size_t const segmentSize = sizeof(XXH128_hash_t);
4864
- size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
4865
- XXH128_canonical_t scrambler;
4866
- XXH64_hash_t seeds[12];
4867
- size_t segnb;
4868
- XXH_ASSERT(nbSegments == 12);
4869
- XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
4870
- XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
4871
-
4872
- /*
4873
- * Copy customSeed to seeds[], truncating or repeating as necessary.
4874
- */
4875
- { size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
4876
- size_t filled = toFill;
4877
- memcpy(seeds, customSeed, toFill);
4878
- while (filled < sizeof(seeds)) {
4879
- toFill = XXH_MIN(filled, sizeof(seeds) - filled);
4880
- memcpy((char*)seeds + filled, seeds, toFill);
4881
- filled += toFill;
4882
- } }
4883
-
4884
- /* generate secret */
4885
- memcpy(secretBuffer, &scrambler, sizeof(scrambler));
4886
- for (segnb=1; segnb < nbSegments; segnb++) {
4887
- size_t const segmentStart = segnb * segmentSize;
4888
- XXH128_canonical_t segment;
4889
- XXH128_canonicalFromHash(&segment,
4890
- XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
4891
- memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
4892
- } }
4893
- }
5436
+ #endif /* !XXH_NO_STREAM */
4894
5437
 
4895
5438
 
4896
5439
  /* ==========================================
@@ -4910,7 +5453,7 @@ XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSee
4910
5453
  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
4911
5454
  */
4912
5455
 
4913
- XXH_FORCE_INLINE XXH128_hash_t
5456
+ XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
4914
5457
  XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4915
5458
  {
4916
5459
  /* A doubled version of 1to3_64b with different constants. */
@@ -4939,7 +5482,7 @@ XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
4939
5482
  }
4940
5483
  }
4941
5484
 
4942
- XXH_FORCE_INLINE XXH128_hash_t
5485
+ XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
4943
5486
  XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4944
5487
  {
4945
5488
  XXH_ASSERT(input != NULL);
@@ -4966,7 +5509,7 @@ XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
4966
5509
  }
4967
5510
  }
4968
5511
 
4969
- XXH_FORCE_INLINE XXH128_hash_t
5512
+ XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
4970
5513
  XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
4971
5514
  {
4972
5515
  XXH_ASSERT(input != NULL);
@@ -5041,7 +5584,7 @@ XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
5041
5584
  /*
5042
5585
  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
5043
5586
  */
5044
- XXH_FORCE_INLINE XXH128_hash_t
5587
+ XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5045
5588
  XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
5046
5589
  {
5047
5590
  XXH_ASSERT(len <= 16);
@@ -5072,7 +5615,7 @@ XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
5072
5615
  }
5073
5616
 
5074
5617
 
5075
- XXH_FORCE_INLINE XXH128_hash_t
5618
+ XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
5076
5619
  XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5077
5620
  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5078
5621
  XXH64_hash_t seed)
@@ -5083,6 +5626,16 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5083
5626
  { XXH128_hash_t acc;
5084
5627
  acc.low64 = len * XXH_PRIME64_1;
5085
5628
  acc.high64 = 0;
5629
+
5630
+ #if XXH_SIZE_OPT >= 1
5631
+ {
5632
+ /* Smaller, but slightly slower. */
5633
+ size_t i = (len - 1) / 32;
5634
+ do {
5635
+ acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
5636
+ } while (i-- != 0);
5637
+ }
5638
+ #else
5086
5639
  if (len > 32) {
5087
5640
  if (len > 64) {
5088
5641
  if (len > 96) {
@@ -5093,6 +5646,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5093
5646
  acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
5094
5647
  }
5095
5648
  acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
5649
+ #endif
5096
5650
  { XXH128_hash_t h128;
5097
5651
  h128.low64 = acc.low64 + acc.high64;
5098
5652
  h128.high64 = (acc.low64 * XXH_PRIME64_1)
@@ -5105,7 +5659,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5105
5659
  }
5106
5660
  }
5107
5661
 
5108
- XXH_NO_INLINE XXH128_hash_t
5662
+ XXH_NO_INLINE XXH_PUREF XXH128_hash_t
5109
5663
  XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
5110
5664
  const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
5111
5665
  XXH64_hash_t seed)
@@ -5180,9 +5734,9 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
5180
5734
  }
5181
5735
 
5182
5736
  /*
5183
- * It's important for performance that XXH3_hashLong is not inlined.
5737
+ * It's important for performance that XXH3_hashLong() is not inlined.
5184
5738
  */
5185
- XXH_NO_INLINE XXH128_hash_t
5739
+ XXH_NO_INLINE XXH_PUREF XXH128_hash_t
5186
5740
  XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
5187
5741
  XXH64_hash_t seed64,
5188
5742
  const void* XXH_RESTRICT secret, size_t secretLen)
@@ -5193,9 +5747,10 @@ XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
5193
5747
  }
5194
5748
 
5195
5749
  /*
5196
- * It's important for performance that XXH3_hashLong is not inlined.
5750
+ * It's important for performance to pass @p secretLen (when it's static)
5751
+ * to the compiler, so that it can properly optimize the vectorized loop.
5197
5752
  */
5198
- XXH_NO_INLINE XXH128_hash_t
5753
+ XXH_FORCE_INLINE XXH128_hash_t
5199
5754
  XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
5200
5755
  XXH64_hash_t seed64,
5201
5756
  const void* XXH_RESTRICT secret, size_t secretLen)
@@ -5262,7 +5817,7 @@ XXH3_128bits_internal(const void* input, size_t len,
5262
5817
 
5263
5818
  /* === Public XXH128 API === */
5264
5819
 
5265
- /*! @ingroup xxh3_family */
5820
+ /*! @ingroup XXH3_family */
5266
5821
  XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
5267
5822
  {
5268
5823
  return XXH3_128bits_internal(input, len, 0,
@@ -5270,7 +5825,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
5270
5825
  XXH3_hashLong_128b_default);
5271
5826
  }
5272
5827
 
5273
- /*! @ingroup xxh3_family */
5828
+ /*! @ingroup XXH3_family */
5274
5829
  XXH_PUBLIC_API XXH128_hash_t
5275
5830
  XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
5276
5831
  {
@@ -5279,7 +5834,7 @@ XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_
5279
5834
  XXH3_hashLong_128b_withSecret);
5280
5835
  }
5281
5836
 
5282
- /*! @ingroup xxh3_family */
5837
+ /*! @ingroup XXH3_family */
5283
5838
  XXH_PUBLIC_API XXH128_hash_t
5284
5839
  XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
5285
5840
  {
@@ -5288,7 +5843,16 @@ XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
5288
5843
  XXH3_hashLong_128b_withSeed);
5289
5844
  }
5290
5845
 
5291
- /*! @ingroup xxh3_family */
5846
+ /*! @ingroup XXH3_family */
5847
+ XXH_PUBLIC_API XXH128_hash_t
5848
+ XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
5849
+ {
5850
+ if (len <= XXH3_MIDSIZE_MAX)
5851
+ return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
5852
+ return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
5853
+ }
5854
+
5855
+ /*! @ingroup XXH3_family */
5292
5856
  XXH_PUBLIC_API XXH128_hash_t
5293
5857
  XXH128(const void* input, size_t len, XXH64_hash_t seed)
5294
5858
  {
@@ -5297,44 +5861,41 @@ XXH128(const void* input, size_t len, XXH64_hash_t seed)
5297
5861
 
5298
5862
 
5299
5863
  /* === XXH3 128-bit streaming === */
5300
-
5864
+ #ifndef XXH_NO_STREAM
5301
5865
  /*
5302
- * All the functions are actually the same as for 64-bit streaming variant.
5866
+ * All initialization and update functions are identical to 64-bit streaming variant.
5303
5867
  * The only difference is the finalization routine.
5304
5868
  */
5305
5869
 
5306
- /*! @ingroup xxh3_family */
5870
+ /*! @ingroup XXH3_family */
5307
5871
  XXH_PUBLIC_API XXH_errorcode
5308
5872
  XXH3_128bits_reset(XXH3_state_t* statePtr)
5309
5873
  {
5310
- if (statePtr == NULL) return XXH_ERROR;
5311
- XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
5312
- return XXH_OK;
5874
+ return XXH3_64bits_reset(statePtr);
5313
5875
  }
5314
5876
 
5315
- /*! @ingroup xxh3_family */
5877
+ /*! @ingroup XXH3_family */
5316
5878
  XXH_PUBLIC_API XXH_errorcode
5317
5879
  XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
5318
5880
  {
5319
- if (statePtr == NULL) return XXH_ERROR;
5320
- XXH3_reset_internal(statePtr, 0, secret, secretSize);
5321
- if (secret == NULL) return XXH_ERROR;
5322
- if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
5323
- return XXH_OK;
5881
+ return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
5324
5882
  }
5325
5883
 
5326
- /*! @ingroup xxh3_family */
5884
+ /*! @ingroup XXH3_family */
5327
5885
  XXH_PUBLIC_API XXH_errorcode
5328
5886
  XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
5329
5887
  {
5330
- if (statePtr == NULL) return XXH_ERROR;
5331
- if (seed==0) return XXH3_128bits_reset(statePtr);
5332
- if (seed != statePtr->seed) XXH3_initCustomSecret(statePtr->customSecret, seed);
5333
- XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
5334
- return XXH_OK;
5888
+ return XXH3_64bits_reset_withSeed(statePtr, seed);
5889
+ }
5890
+
5891
+ /*! @ingroup XXH3_family */
5892
+ XXH_PUBLIC_API XXH_errorcode
5893
+ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
5894
+ {
5895
+ return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
5335
5896
  }
5336
5897
 
5337
- /*! @ingroup xxh3_family */
5898
+ /*! @ingroup XXH3_family */
5338
5899
  XXH_PUBLIC_API XXH_errorcode
5339
5900
  XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
5340
5901
  {
@@ -5342,7 +5903,7 @@ XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
5342
5903
  XXH3_accumulate_512, XXH3_scrambleAcc);
5343
5904
  }
5344
5905
 
5345
- /*! @ingroup xxh3_family */
5906
+ /*! @ingroup XXH3_family */
5346
5907
  XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
5347
5908
  {
5348
5909
  const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
@@ -5367,13 +5928,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
5367
5928
  return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
5368
5929
  secret, state->secretLimit + XXH_STRIPE_LEN);
5369
5930
  }
5370
-
5931
+ #endif /* !XXH_NO_STREAM */
5371
5932
  /* 128-bit utility functions */
5372
5933
 
5373
5934
  #include <string.h> /* memcmp, memcpy */
5374
5935
 
5375
5936
  /* return : 1 is equal, 0 if different */
5376
- /*! @ingroup xxh3_family */
5937
+ /*! @ingroup XXH3_family */
5377
5938
  XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
5378
5939
  {
5379
5940
  /* note : XXH128_hash_t is compact, it has no padding byte */
@@ -5381,10 +5942,10 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
5381
5942
  }
5382
5943
 
5383
5944
  /* This prototype is compatible with stdlib's qsort().
5384
- * return : >0 if *h128_1 > *h128_2
5385
- * <0 if *h128_1 < *h128_2
5386
- * =0 if *h128_1 == *h128_2 */
5387
- /*! @ingroup xxh3_family */
5945
+ * @return : >0 if *h128_1 > *h128_2
5946
+ * <0 if *h128_1 < *h128_2
5947
+ * =0 if *h128_1 == *h128_2 */
5948
+ /*! @ingroup XXH3_family */
5388
5949
  XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
5389
5950
  {
5390
5951
  XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
@@ -5397,7 +5958,7 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
5397
5958
 
5398
5959
 
5399
5960
  /*====== Canonical representation ======*/
5400
- /*! @ingroup xxh3_family */
5961
+ /*! @ingroup XXH3_family */
5401
5962
  XXH_PUBLIC_API void
5402
5963
  XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
5403
5964
  {
@@ -5406,11 +5967,11 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
5406
5967
  hash.high64 = XXH_swap64(hash.high64);
5407
5968
  hash.low64 = XXH_swap64(hash.low64);
5408
5969
  }
5409
- memcpy(dst, &hash.high64, sizeof(hash.high64));
5410
- memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
5970
+ XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
5971
+ XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
5411
5972
  }
5412
5973
 
5413
- /*! @ingroup xxh3_family */
5974
+ /*! @ingroup XXH3_family */
5414
5975
  XXH_PUBLIC_API XXH128_hash_t
5415
5976
  XXH128_hashFromCanonical(const XXH128_canonical_t* src)
5416
5977
  {
@@ -5420,10 +5981,81 @@ XXH128_hashFromCanonical(const XXH128_canonical_t* src)
5420
5981
  return h;
5421
5982
  }
5422
5983
 
5984
+
5985
+
5986
+ /* ==========================================
5987
+ * Secret generators
5988
+ * ==========================================
5989
+ */
5990
+ #define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
5991
+
5992
+ XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
5993
+ {
5994
+ XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
5995
+ XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
5996
+ }
5997
+
5998
+ /*! @ingroup XXH3_family */
5999
+ XXH_PUBLIC_API XXH_errorcode
6000
+ XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
6001
+ {
6002
+ #if (XXH_DEBUGLEVEL >= 1)
6003
+ XXH_ASSERT(secretBuffer != NULL);
6004
+ XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
6005
+ #else
6006
+ /* production mode, assert() are disabled */
6007
+ if (secretBuffer == NULL) return XXH_ERROR;
6008
+ if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
6009
+ #endif
6010
+
6011
+ if (customSeedSize == 0) {
6012
+ customSeed = XXH3_kSecret;
6013
+ customSeedSize = XXH_SECRET_DEFAULT_SIZE;
6014
+ }
6015
+ #if (XXH_DEBUGLEVEL >= 1)
6016
+ XXH_ASSERT(customSeed != NULL);
6017
+ #else
6018
+ if (customSeed == NULL) return XXH_ERROR;
6019
+ #endif
6020
+
6021
+ /* Fill secretBuffer with a copy of customSeed - repeat as needed */
6022
+ { size_t pos = 0;
6023
+ while (pos < secretSize) {
6024
+ size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
6025
+ memcpy((char*)secretBuffer + pos, customSeed, toCopy);
6026
+ pos += toCopy;
6027
+ } }
6028
+
6029
+ { size_t const nbSeg16 = secretSize / 16;
6030
+ size_t n;
6031
+ XXH128_canonical_t scrambler;
6032
+ XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
6033
+ for (n=0; n<nbSeg16; n++) {
6034
+ XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
6035
+ XXH3_combine16((char*)secretBuffer + n*16, h128);
6036
+ }
6037
+ /* last segment */
6038
+ XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
6039
+ }
6040
+ return XXH_OK;
6041
+ }
6042
+
6043
+ /*! @ingroup XXH3_family */
6044
+ XXH_PUBLIC_API void
6045
+ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
6046
+ {
6047
+ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
6048
+ XXH3_initCustomSecret(secret, seed);
6049
+ XXH_ASSERT(secretBuffer != NULL);
6050
+ memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
6051
+ }
6052
+
6053
+
6054
+
5423
6055
  /* Pop our optimization override from above */
5424
6056
  #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
5425
6057
  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
5426
- && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
6058
+ && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
5427
6059
  # pragma GCC pop_options
5428
6060
  #endif
5429
6061