@nxtedition/rocksdb 8.1.3 → 8.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/deps/rocksdb/rocksdb/CMakeLists.txt +13 -1
- package/deps/rocksdb/rocksdb/Makefile +2 -2
- package/deps/rocksdb/rocksdb/TARGETS +4 -2
- package/deps/rocksdb/rocksdb/cache/cache_bench_tool.cc +32 -35
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.cc +0 -30
- package/deps/rocksdb/rocksdb/cache/cache_entry_roles.h +0 -83
- package/deps/rocksdb/rocksdb/cache/cache_entry_stats.h +13 -14
- package/deps/rocksdb/rocksdb/cache/cache_helpers.cc +40 -0
- package/deps/rocksdb/rocksdb/cache/cache_helpers.h +14 -20
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.cc +8 -9
- package/deps/rocksdb/rocksdb/cache/cache_reservation_manager.h +5 -4
- package/deps/rocksdb/rocksdb/cache/cache_test.cc +124 -156
- package/deps/rocksdb/rocksdb/cache/charged_cache.cc +10 -26
- package/deps/rocksdb/rocksdb/cache/charged_cache.h +11 -16
- package/deps/rocksdb/rocksdb/cache/clock_cache.cc +35 -32
- package/deps/rocksdb/rocksdb/cache/clock_cache.h +19 -21
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.cc +42 -30
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache.h +9 -8
- package/deps/rocksdb/rocksdb/cache/compressed_secondary_cache_test.cc +91 -143
- package/deps/rocksdb/rocksdb/cache/lru_cache.cc +54 -60
- package/deps/rocksdb/rocksdb/cache/lru_cache.h +37 -63
- package/deps/rocksdb/rocksdb/cache/lru_cache_test.cc +120 -106
- package/deps/rocksdb/rocksdb/cache/secondary_cache.cc +14 -5
- package/deps/rocksdb/rocksdb/cache/sharded_cache.h +16 -31
- package/deps/rocksdb/rocksdb/cache/typed_cache.h +339 -0
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.cc +0 -48
- package/deps/rocksdb/rocksdb/db/blob/blob_contents.h +18 -15
- package/deps/rocksdb/rocksdb/db/blob/blob_counting_iterator.h +0 -11
- package/deps/rocksdb/rocksdb/db/blob/blob_file_builder.cc +5 -26
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.cc +7 -8
- package/deps/rocksdb/rocksdb/db/blob/blob_file_cache.h +6 -3
- package/deps/rocksdb/rocksdb/db/blob/blob_file_reader.cc +2 -7
- package/deps/rocksdb/rocksdb/db/blob/blob_source.cc +19 -47
- package/deps/rocksdb/rocksdb/db/blob/blob_source.h +13 -5
- package/deps/rocksdb/rocksdb/db/blob/blob_source_test.cc +15 -22
- package/deps/rocksdb/rocksdb/db/builder.cc +17 -12
- package/deps/rocksdb/rocksdb/db/column_family.cc +0 -1
- package/deps/rocksdb/rocksdb/db/column_family.h +0 -6
- package/deps/rocksdb/rocksdb/db/compaction/clipping_iterator.h +0 -5
- package/deps/rocksdb/rocksdb/db/compaction/compaction.cc +3 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction.h +0 -2
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.cc +28 -27
- package/deps/rocksdb/rocksdb/db/compaction/compaction_iterator.h +2 -17
- package/deps/rocksdb/rocksdb/db/compaction/compaction_job.cc +1 -0
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.cc +254 -139
- package/deps/rocksdb/rocksdb/db/compaction/compaction_outputs.h +7 -5
- package/deps/rocksdb/rocksdb/db/compaction/subcompaction_state.h +0 -5
- package/deps/rocksdb/rocksdb/db/db_basic_test.cc +98 -9
- package/deps/rocksdb/rocksdb/db/db_block_cache_test.cc +28 -28
- package/deps/rocksdb/rocksdb/db/db_compaction_test.cc +125 -0
- package/deps/rocksdb/rocksdb/db/db_flush_test.cc +65 -4
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.cc +1 -1
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl.h +27 -15
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc +78 -49
- package/deps/rocksdb/rocksdb/db/db_impl/db_impl_write.cc +34 -24
- package/deps/rocksdb/rocksdb/db/db_iter.cc +8 -2
- package/deps/rocksdb/rocksdb/db/db_merge_operand_test.cc +42 -0
- package/deps/rocksdb/rocksdb/db/db_merge_operator_test.cc +155 -0
- package/deps/rocksdb/rocksdb/db/db_properties_test.cc +12 -12
- package/deps/rocksdb/rocksdb/db/db_range_del_test.cc +117 -210
- package/deps/rocksdb/rocksdb/db/db_test_util.cc +11 -10
- package/deps/rocksdb/rocksdb/db/db_test_util.h +36 -24
- package/deps/rocksdb/rocksdb/db/db_with_timestamp_basic_test.cc +28 -0
- package/deps/rocksdb/rocksdb/db/flush_job.cc +6 -6
- package/deps/rocksdb/rocksdb/db/flush_job.h +3 -2
- package/deps/rocksdb/rocksdb/db/flush_job_test.cc +29 -29
- package/deps/rocksdb/rocksdb/db/history_trimming_iterator.h +0 -4
- package/deps/rocksdb/rocksdb/db/internal_stats.cc +11 -11
- package/deps/rocksdb/rocksdb/db/internal_stats.h +2 -2
- package/deps/rocksdb/rocksdb/db/log_reader.cc +8 -6
- package/deps/rocksdb/rocksdb/db/log_test.cc +35 -2
- package/deps/rocksdb/rocksdb/db/memtable.cc +30 -5
- package/deps/rocksdb/rocksdb/db/merge_helper.cc +47 -33
- package/deps/rocksdb/rocksdb/db/merge_helper.h +14 -6
- package/deps/rocksdb/rocksdb/db/table_cache.cc +41 -91
- package/deps/rocksdb/rocksdb/db/table_cache.h +17 -19
- package/deps/rocksdb/rocksdb/db/table_cache_sync_and_async.h +7 -9
- package/deps/rocksdb/rocksdb/db/version_builder.cc +12 -9
- package/deps/rocksdb/rocksdb/db/version_edit.h +1 -0
- package/deps/rocksdb/rocksdb/db/version_set.cc +20 -28
- package/deps/rocksdb/rocksdb/db/version_set.h +2 -2
- package/deps/rocksdb/rocksdb/db/version_set_sync_and_async.h +1 -1
- package/deps/rocksdb/rocksdb/db/write_batch.cc +4 -1
- package/deps/rocksdb/rocksdb/file/file_prefetch_buffer.cc +1 -0
- package/deps/rocksdb/rocksdb/file/prefetch_test.cc +358 -214
- package/deps/rocksdb/rocksdb/include/rocksdb/cache.h +137 -135
- package/deps/rocksdb/rocksdb/include/rocksdb/merge_operator.h +21 -0
- package/deps/rocksdb/rocksdb/include/rocksdb/secondary_cache.h +8 -6
- package/deps/rocksdb/rocksdb/include/rocksdb/version.h +1 -1
- package/deps/rocksdb/rocksdb/memory/memory_allocator.h +9 -0
- package/deps/rocksdb/rocksdb/options/customizable_test.cc +4 -3
- package/deps/rocksdb/rocksdb/port/port_posix.h +2 -0
- package/{prebuilds → deps/rocksdb/rocksdb/prebuilds}/linux-x64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/src.mk +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/block.h +3 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc +25 -67
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_builder.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc +18 -13
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc +156 -223
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader.h +31 -50
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_impl.h +46 -18
- package/deps/rocksdb/rocksdb/table/block_based/block_based_table_reader_sync_and_async.h +3 -3
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.cc +96 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_cache.h +132 -0
- package/deps/rocksdb/rocksdb/table/block_based/cachable_entry.h +28 -0
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc +6 -5
- package/deps/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h +1 -4
- package/deps/rocksdb/rocksdb/table/block_based/full_filter_block.cc +6 -7
- package/deps/rocksdb/rocksdb/table/block_based/index_reader_common.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h +6 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc +19 -18
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h +9 -5
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc +3 -1
- package/deps/rocksdb/rocksdb/table/block_based/partitioned_index_reader.cc +2 -1
- package/deps/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc +2 -2
- package/deps/rocksdb/rocksdb/table/format.h +1 -1
- package/deps/rocksdb/rocksdb/table/get_context.cc +12 -3
- package/deps/rocksdb/rocksdb/table/internal_iterator.h +0 -2
- package/deps/rocksdb/rocksdb/table/merging_iterator.cc +92 -7
- package/deps/rocksdb/rocksdb/table/merging_iterator.h +0 -80
- package/deps/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc +66 -1
- package/deps/rocksdb/rocksdb/tools/db_bench_tool.cc +9 -2
- package/deps/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc +5 -0
- package/deps/rocksdb/rocksdb/trace_replay/trace_replay.cc +1 -1
- package/deps/rocksdb/rocksdb/util/async_file_reader.cc +20 -12
- package/deps/rocksdb/rocksdb/util/compression.cc +2 -2
- package/deps/rocksdb/rocksdb/util/compression.h +11 -2
- package/deps/rocksdb/rocksdb/util/xxhash.h +1901 -887
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.cc +35 -57
- package/deps/rocksdb/rocksdb/utilities/cache_dump_load_impl.h +4 -5
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.cc +11 -6
- package/deps/rocksdb/rocksdb/utilities/fault_injection_secondary_cache.h +6 -5
- package/deps/rocksdb/rocksdb/utilities/memory_allocators.h +0 -1
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc +10 -11
- package/deps/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc +31 -31
- package/deps/rocksdb/rocksdb/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h +4 -0
- package/deps/rocksdb/rocksdb/utilities/transactions/transaction_test.cc +52 -0
- package/deps/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc +1 -0
- package/deps/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc +12 -3
- package/deps/rocksdb/rocksdb.gyp +0 -3
- package/index.js +2 -2
- package/package.json +1 -1
- package/prebuilds/darwin-arm64/node.napi.node +0 -0
- package/deps/rocksdb/rocksdb/table/block_based/block_like_traits.h +0 -182
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.cc +0 -142
- package/deps/rocksdb/rocksdb/table/compaction_merging_iterator.h +0 -241
|
@@ -20,7 +20,7 @@
|
|
|
20
20
|
/*
|
|
21
21
|
* xxHash - Extremely Fast Hash algorithm
|
|
22
22
|
* Header File
|
|
23
|
-
* Copyright (C) 2012-
|
|
23
|
+
* Copyright (C) 2012-2021 Yann Collet
|
|
24
24
|
*
|
|
25
25
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
|
26
26
|
*
|
|
@@ -51,49 +51,142 @@
|
|
|
51
51
|
* - xxHash homepage: https://www.xxhash.com
|
|
52
52
|
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
|
53
53
|
*/
|
|
54
|
+
|
|
54
55
|
/*!
|
|
55
56
|
* @mainpage xxHash
|
|
56
57
|
*
|
|
58
|
+
* xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
|
|
59
|
+
* limits.
|
|
60
|
+
*
|
|
61
|
+
* It is proposed in four flavors, in three families:
|
|
62
|
+
* 1. @ref XXH32_family
|
|
63
|
+
* - Classic 32-bit hash function. Simple, compact, and runs on almost all
|
|
64
|
+
* 32-bit and 64-bit systems.
|
|
65
|
+
* 2. @ref XXH64_family
|
|
66
|
+
* - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
|
|
67
|
+
* 64-bit systems (but _not_ 32-bit systems).
|
|
68
|
+
* 3. @ref XXH3_family
|
|
69
|
+
* - Modern 64-bit and 128-bit hash function family which features improved
|
|
70
|
+
* strength and performance across the board, especially on smaller data.
|
|
71
|
+
* It benefits greatly from SIMD and 64-bit without requiring it.
|
|
72
|
+
*
|
|
73
|
+
* Benchmarks
|
|
74
|
+
* ---
|
|
75
|
+
* The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
|
|
76
|
+
* The open source benchmark program is compiled with clang v10.0 using -O3 flag.
|
|
77
|
+
*
|
|
78
|
+
* | Hash Name | ISA ext | Width | Large Data Speed | Small Data Velocity |
|
|
79
|
+
* | -------------------- | ------- | ----: | ---------------: | ------------------: |
|
|
80
|
+
* | XXH3_64bits() | @b AVX2 | 64 | 59.4 GB/s | 133.1 |
|
|
81
|
+
* | MeowHash | AES-NI | 128 | 58.2 GB/s | 52.5 |
|
|
82
|
+
* | XXH3_128bits() | @b AVX2 | 128 | 57.9 GB/s | 118.1 |
|
|
83
|
+
* | CLHash | PCLMUL | 64 | 37.1 GB/s | 58.1 |
|
|
84
|
+
* | XXH3_64bits() | @b SSE2 | 64 | 31.5 GB/s | 133.1 |
|
|
85
|
+
* | XXH3_128bits() | @b SSE2 | 128 | 29.6 GB/s | 118.1 |
|
|
86
|
+
* | RAM sequential read | | N/A | 28.0 GB/s | N/A |
|
|
87
|
+
* | ahash | AES-NI | 64 | 22.5 GB/s | 107.2 |
|
|
88
|
+
* | City64 | | 64 | 22.0 GB/s | 76.6 |
|
|
89
|
+
* | T1ha2 | | 64 | 22.0 GB/s | 99.0 |
|
|
90
|
+
* | City128 | | 128 | 21.7 GB/s | 57.7 |
|
|
91
|
+
* | FarmHash | AES-NI | 64 | 21.3 GB/s | 71.9 |
|
|
92
|
+
* | XXH64() | | 64 | 19.4 GB/s | 71.0 |
|
|
93
|
+
* | SpookyHash | | 64 | 19.3 GB/s | 53.2 |
|
|
94
|
+
* | Mum | | 64 | 18.0 GB/s | 67.0 |
|
|
95
|
+
* | CRC32C | SSE4.2 | 32 | 13.0 GB/s | 57.9 |
|
|
96
|
+
* | XXH32() | | 32 | 9.7 GB/s | 71.9 |
|
|
97
|
+
* | City32 | | 32 | 9.1 GB/s | 66.0 |
|
|
98
|
+
* | Blake3* | @b AVX2 | 256 | 4.4 GB/s | 8.1 |
|
|
99
|
+
* | Murmur3 | | 32 | 3.9 GB/s | 56.1 |
|
|
100
|
+
* | SipHash* | | 64 | 3.0 GB/s | 43.2 |
|
|
101
|
+
* | Blake3* | @b SSE2 | 256 | 2.4 GB/s | 8.1 |
|
|
102
|
+
* | HighwayHash | | 64 | 1.4 GB/s | 6.0 |
|
|
103
|
+
* | FNV64 | | 64 | 1.2 GB/s | 62.7 |
|
|
104
|
+
* | Blake2* | | 256 | 1.1 GB/s | 5.1 |
|
|
105
|
+
* | SHA1* | | 160 | 0.8 GB/s | 5.6 |
|
|
106
|
+
* | MD5* | | 128 | 0.6 GB/s | 7.8 |
|
|
107
|
+
* @note
|
|
108
|
+
* - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
|
|
109
|
+
* even though it is mandatory on x64.
|
|
110
|
+
* - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
|
|
111
|
+
* by modern standards.
|
|
112
|
+
* - Small data velocity is a rough average of algorithm's efficiency for small
|
|
113
|
+
* data. For more accurate information, see the wiki.
|
|
114
|
+
* - More benchmarks and strength tests are found on the wiki:
|
|
115
|
+
* https://github.com/Cyan4973/xxHash/wiki
|
|
116
|
+
*
|
|
117
|
+
* Usage
|
|
118
|
+
* ------
|
|
119
|
+
* All xxHash variants use a similar API. Changing the algorithm is a trivial
|
|
120
|
+
* substitution.
|
|
121
|
+
*
|
|
122
|
+
* @pre
|
|
123
|
+
* For functions which take an input and length parameter, the following
|
|
124
|
+
* requirements are assumed:
|
|
125
|
+
* - The range from [`input`, `input + length`) is valid, readable memory.
|
|
126
|
+
* - The only exception is if the `length` is `0`, `input` may be `NULL`.
|
|
127
|
+
* - For C++, the objects must have the *TriviallyCopyable* property, as the
|
|
128
|
+
* functions access bytes directly as if it was an array of `unsigned char`.
|
|
129
|
+
*
|
|
130
|
+
* @anchor single_shot_example
|
|
131
|
+
* **Single Shot**
|
|
132
|
+
*
|
|
133
|
+
* These functions are stateless functions which hash a contiguous block of memory,
|
|
134
|
+
* immediately returning the result. They are the easiest and usually the fastest
|
|
135
|
+
* option.
|
|
136
|
+
*
|
|
137
|
+
* XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
|
|
138
|
+
*
|
|
139
|
+
* @code{.c}
|
|
140
|
+
* #include <string.h>
|
|
141
|
+
* #include "xxhash.h"
|
|
142
|
+
*
|
|
143
|
+
* // Example for a function which hashes a null terminated string with XXH32().
|
|
144
|
+
* XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
|
|
145
|
+
* {
|
|
146
|
+
* // NULL pointers are only valid if the length is zero
|
|
147
|
+
* size_t length = (string == NULL) ? 0 : strlen(string);
|
|
148
|
+
* return XXH32(string, length, seed);
|
|
149
|
+
* }
|
|
150
|
+
* @endcode
|
|
151
|
+
*
|
|
152
|
+
* @anchor streaming_example
|
|
153
|
+
* **Streaming**
|
|
154
|
+
*
|
|
155
|
+
* These groups of functions allow incremental hashing of unknown size, even
|
|
156
|
+
* more than what would fit in a size_t.
|
|
157
|
+
*
|
|
158
|
+
* XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
|
|
159
|
+
*
|
|
160
|
+
* @code{.c}
|
|
161
|
+
* #include <stdio.h>
|
|
162
|
+
* #include <assert.h>
|
|
163
|
+
* #include "xxhash.h"
|
|
164
|
+
* // Example for a function which hashes a FILE incrementally with XXH3_64bits().
|
|
165
|
+
* XXH64_hash_t hashFile(FILE* f)
|
|
166
|
+
* {
|
|
167
|
+
* // Allocate a state struct. Do not just use malloc() or new.
|
|
168
|
+
* XXH3_state_t* state = XXH3_createState();
|
|
169
|
+
* assert(state != NULL && "Out of memory!");
|
|
170
|
+
* // Reset the state to start a new hashing session.
|
|
171
|
+
* XXH3_64bits_reset(state);
|
|
172
|
+
* char buffer[4096];
|
|
173
|
+
* size_t count;
|
|
174
|
+
* // Read the file in chunks
|
|
175
|
+
* while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
|
|
176
|
+
* // Run update() as many times as necessary to process the data
|
|
177
|
+
* XXH3_64bits_update(state, buffer, count);
|
|
178
|
+
* }
|
|
179
|
+
* // Retrieve the finalized hash. This will not change the state.
|
|
180
|
+
* XXH64_hash_t result = XXH3_64bits_digest(state);
|
|
181
|
+
* // Free the state. Do not use free().
|
|
182
|
+
* XXH3_freeState(state);
|
|
183
|
+
* return result;
|
|
184
|
+
* }
|
|
185
|
+
* @endcode
|
|
186
|
+
*
|
|
57
187
|
* @file xxhash.h
|
|
58
188
|
* xxHash prototypes and implementation
|
|
59
189
|
*/
|
|
60
|
-
/* TODO: update */
|
|
61
|
-
/* Notice extracted from xxHash homepage:
|
|
62
|
-
|
|
63
|
-
xxHash is an extremely fast hash algorithm, running at RAM speed limits.
|
|
64
|
-
It also successfully passes all tests from the SMHasher suite.
|
|
65
|
-
|
|
66
|
-
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
|
|
67
|
-
|
|
68
|
-
Name Speed Q.Score Author
|
|
69
|
-
xxHash 5.4 GB/s 10
|
|
70
|
-
CrapWow 3.2 GB/s 2 Andrew
|
|
71
|
-
MurmurHash 3a 2.7 GB/s 10 Austin Appleby
|
|
72
|
-
SpookyHash 2.0 GB/s 10 Bob Jenkins
|
|
73
|
-
SBox 1.4 GB/s 9 Bret Mulvey
|
|
74
|
-
Lookup3 1.2 GB/s 9 Bob Jenkins
|
|
75
|
-
SuperFastHash 1.2 GB/s 1 Paul Hsieh
|
|
76
|
-
CityHash64 1.05 GB/s 10 Pike & Alakuijala
|
|
77
|
-
FNV 0.55 GB/s 5 Fowler, Noll, Vo
|
|
78
|
-
CRC32 0.43 GB/s 9
|
|
79
|
-
MD5-32 0.33 GB/s 10 Ronald L. Rivest
|
|
80
|
-
SHA1-32 0.28 GB/s 10
|
|
81
|
-
|
|
82
|
-
Q.Score is a measure of quality of the hash function.
|
|
83
|
-
It depends on successfully passing SMHasher test set.
|
|
84
|
-
10 is a perfect score.
|
|
85
|
-
|
|
86
|
-
Note: SMHasher's CRC32 implementation is not the fastest one.
|
|
87
|
-
Other speed-oriented implementations can be faster,
|
|
88
|
-
especially in combination with PCLMUL instruction:
|
|
89
|
-
https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
|
|
90
|
-
|
|
91
|
-
A 64-bit version, named XXH64, is available since r35.
|
|
92
|
-
It offers much better speed, but for 64-bit applications only.
|
|
93
|
-
Name Speed on 64 bits Speed on 32 bits
|
|
94
|
-
XXH64 13.8 GB/s 1.9 GB/s
|
|
95
|
-
XXH32 6.8 GB/s 6.0 GB/s
|
|
96
|
-
*/
|
|
97
190
|
|
|
98
191
|
#if defined (__cplusplus)
|
|
99
192
|
extern "C" {
|
|
@@ -103,21 +196,53 @@ extern "C" {
|
|
|
103
196
|
* INLINE mode
|
|
104
197
|
******************************/
|
|
105
198
|
/*!
|
|
106
|
-
*
|
|
199
|
+
* @defgroup public Public API
|
|
200
|
+
* Contains details on the public xxHash functions.
|
|
201
|
+
* @{
|
|
202
|
+
*/
|
|
203
|
+
#ifdef XXH_DOXYGEN
|
|
204
|
+
/*!
|
|
205
|
+
* @brief Exposes the implementation and marks all functions as `inline`.
|
|
206
|
+
*
|
|
107
207
|
* Use these build macros to inline xxhash into the target unit.
|
|
108
208
|
* Inlining improves performance on small inputs, especially when the length is
|
|
109
209
|
* expressed as a compile-time constant:
|
|
110
210
|
*
|
|
111
|
-
*
|
|
211
|
+
* https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
|
|
112
212
|
*
|
|
113
213
|
* It also keeps xxHash symbols private to the unit, so they are not exported.
|
|
114
214
|
*
|
|
115
215
|
* Usage:
|
|
216
|
+
* @code{.c}
|
|
116
217
|
* #define XXH_INLINE_ALL
|
|
117
218
|
* #include "xxhash.h"
|
|
118
|
-
*
|
|
219
|
+
* @endcode
|
|
119
220
|
* Do not compile and link xxhash.o as a separate object, as it is not useful.
|
|
120
221
|
*/
|
|
222
|
+
# define XXH_INLINE_ALL
|
|
223
|
+
# undef XXH_INLINE_ALL
|
|
224
|
+
/*!
|
|
225
|
+
* @brief Exposes the implementation without marking functions as inline.
|
|
226
|
+
*/
|
|
227
|
+
# define XXH_PRIVATE_API
|
|
228
|
+
# undef XXH_PRIVATE_API
|
|
229
|
+
/*!
|
|
230
|
+
* @brief Emulate a namespace by transparently prefixing all symbols.
|
|
231
|
+
*
|
|
232
|
+
* If you want to include _and expose_ xxHash functions from within your own
|
|
233
|
+
* library, but also want to avoid symbol collisions with other libraries which
|
|
234
|
+
* may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
|
|
235
|
+
* any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
|
|
236
|
+
* (therefore, avoid empty or numeric values).
|
|
237
|
+
*
|
|
238
|
+
* Note that no change is required within the calling program as long as it
|
|
239
|
+
* includes `xxhash.h`: Regular symbol names will be automatically translated
|
|
240
|
+
* by this header.
|
|
241
|
+
*/
|
|
242
|
+
# define XXH_NAMESPACE /* YOUR NAME HERE */
|
|
243
|
+
# undef XXH_NAMESPACE
|
|
244
|
+
#endif
|
|
245
|
+
|
|
121
246
|
#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
|
|
122
247
|
&& !defined(XXH_INLINE_ALL_31684351384)
|
|
123
248
|
/* this section should be traversed only once */
|
|
@@ -140,29 +265,80 @@ extern "C" {
|
|
|
140
265
|
|
|
141
266
|
/*
|
|
142
267
|
* This part deals with the special case where a unit wants to inline xxHash,
|
|
143
|
-
* but "xxhash.h" has previously been included without XXH_INLINE_ALL,
|
|
144
|
-
* as part of some previously included *.h header file.
|
|
268
|
+
* but "xxhash.h" has previously been included without XXH_INLINE_ALL,
|
|
269
|
+
* such as part of some previously included *.h header file.
|
|
145
270
|
* Without further action, the new include would just be ignored,
|
|
146
271
|
* and functions would effectively _not_ be inlined (silent failure).
|
|
147
272
|
* The following macros solve this situation by prefixing all inlined names,
|
|
148
273
|
* avoiding naming collision with previous inclusions.
|
|
149
274
|
*/
|
|
150
|
-
#
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
#
|
|
275
|
+
/* Before that, we unconditionally #undef all symbols,
|
|
276
|
+
* in case they were already defined with XXH_NAMESPACE.
|
|
277
|
+
* They will then be redefined for XXH_INLINE_ALL
|
|
278
|
+
*/
|
|
279
|
+
# undef XXH_versionNumber
|
|
280
|
+
/* XXH32 */
|
|
281
|
+
# undef XXH32
|
|
282
|
+
# undef XXH32_createState
|
|
283
|
+
# undef XXH32_freeState
|
|
284
|
+
# undef XXH32_reset
|
|
285
|
+
# undef XXH32_update
|
|
286
|
+
# undef XXH32_digest
|
|
287
|
+
# undef XXH32_copyState
|
|
288
|
+
# undef XXH32_canonicalFromHash
|
|
289
|
+
# undef XXH32_hashFromCanonical
|
|
290
|
+
/* XXH64 */
|
|
291
|
+
# undef XXH64
|
|
292
|
+
# undef XXH64_createState
|
|
293
|
+
# undef XXH64_freeState
|
|
294
|
+
# undef XXH64_reset
|
|
295
|
+
# undef XXH64_update
|
|
296
|
+
# undef XXH64_digest
|
|
297
|
+
# undef XXH64_copyState
|
|
298
|
+
# undef XXH64_canonicalFromHash
|
|
299
|
+
# undef XXH64_hashFromCanonical
|
|
300
|
+
/* XXH3_64bits */
|
|
301
|
+
# undef XXH3_64bits
|
|
302
|
+
# undef XXH3_64bits_withSecret
|
|
303
|
+
# undef XXH3_64bits_withSeed
|
|
304
|
+
# undef XXH3_64bits_withSecretandSeed
|
|
305
|
+
# undef XXH3_createState
|
|
306
|
+
# undef XXH3_freeState
|
|
307
|
+
# undef XXH3_copyState
|
|
308
|
+
# undef XXH3_64bits_reset
|
|
309
|
+
# undef XXH3_64bits_reset_withSeed
|
|
310
|
+
# undef XXH3_64bits_reset_withSecret
|
|
311
|
+
# undef XXH3_64bits_update
|
|
312
|
+
# undef XXH3_64bits_digest
|
|
313
|
+
# undef XXH3_generateSecret
|
|
314
|
+
/* XXH3_128bits */
|
|
315
|
+
# undef XXH128
|
|
316
|
+
# undef XXH3_128bits
|
|
317
|
+
# undef XXH3_128bits_withSeed
|
|
318
|
+
# undef XXH3_128bits_withSecret
|
|
319
|
+
# undef XXH3_128bits_reset
|
|
320
|
+
# undef XXH3_128bits_reset_withSeed
|
|
321
|
+
# undef XXH3_128bits_reset_withSecret
|
|
322
|
+
# undef XXH3_128bits_reset_withSecretandSeed
|
|
323
|
+
# undef XXH3_128bits_update
|
|
324
|
+
# undef XXH3_128bits_digest
|
|
325
|
+
# undef XXH128_isEqual
|
|
326
|
+
# undef XXH128_cmp
|
|
327
|
+
# undef XXH128_canonicalFromHash
|
|
328
|
+
# undef XXH128_hashFromCanonical
|
|
329
|
+
/* Finally, free the namespace itself */
|
|
330
|
+
# undef XXH_NAMESPACE
|
|
331
|
+
|
|
332
|
+
/* employ the namespace for XXH_INLINE_ALL */
|
|
157
333
|
# define XXH_NAMESPACE XXH_INLINE_
|
|
158
334
|
/*
|
|
159
|
-
* Some identifiers (enums, type names) are not symbols,
|
|
160
|
-
*
|
|
335
|
+
* Some identifiers (enums, type names) are not symbols,
|
|
336
|
+
* but they must nonetheless be renamed to avoid redeclaration.
|
|
161
337
|
* Alternative solution: do not redeclare them.
|
|
162
|
-
* However, this requires some #ifdefs, and
|
|
163
|
-
* Meanwhile, renaming can be achieved in a single
|
|
338
|
+
* However, this requires some #ifdefs, and has a more dispersed impact.
|
|
339
|
+
* Meanwhile, renaming can be achieved in a single place.
|
|
164
340
|
*/
|
|
165
|
-
# define XXH_IPREF(Id)
|
|
341
|
+
# define XXH_IPREF(Id) XXH_NAMESPACE ## Id
|
|
166
342
|
# define XXH_OK XXH_IPREF(XXH_OK)
|
|
167
343
|
# define XXH_ERROR XXH_IPREF(XXH_ERROR)
|
|
168
344
|
# define XXH_errorcode XXH_IPREF(XXH_errorcode)
|
|
@@ -181,21 +357,13 @@ extern "C" {
|
|
|
181
357
|
# undef XXHASH_H_STATIC_13879238742
|
|
182
358
|
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
|
|
183
359
|
|
|
184
|
-
|
|
185
|
-
|
|
186
360
|
/* ****************************************************************
|
|
187
361
|
* Stable API
|
|
188
362
|
*****************************************************************/
|
|
189
363
|
#ifndef XXHASH_H_5627135585666179
|
|
190
364
|
#define XXHASH_H_5627135585666179 1
|
|
191
365
|
|
|
192
|
-
|
|
193
|
-
/*!
|
|
194
|
-
* @defgroup public Public API
|
|
195
|
-
* Contains details on the public xxHash functions.
|
|
196
|
-
* @{
|
|
197
|
-
*/
|
|
198
|
-
/* specific declaration modes for Windows */
|
|
366
|
+
/*! @brief Marks a global symbol. */
|
|
199
367
|
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
|
|
200
368
|
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
|
|
201
369
|
# ifdef XXH_EXPORT
|
|
@@ -208,24 +376,6 @@ extern "C" {
|
|
|
208
376
|
# endif
|
|
209
377
|
#endif
|
|
210
378
|
|
|
211
|
-
#ifdef XXH_DOXYGEN
|
|
212
|
-
/*!
|
|
213
|
-
* @brief Emulate a namespace by transparently prefixing all symbols.
|
|
214
|
-
*
|
|
215
|
-
* If you want to include _and expose_ xxHash functions from within your own
|
|
216
|
-
* library, but also want to avoid symbol collisions with other libraries which
|
|
217
|
-
* may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
|
|
218
|
-
* any public symbol from xxhash library with the value of XXH_NAMESPACE
|
|
219
|
-
* (therefore, avoid empty or numeric values).
|
|
220
|
-
*
|
|
221
|
-
* Note that no change is required within the calling program as long as it
|
|
222
|
-
* includes `xxhash.h`: Regular symbol names will be automatically translated
|
|
223
|
-
* by this header.
|
|
224
|
-
*/
|
|
225
|
-
# define XXH_NAMESPACE /* YOUR NAME HERE */
|
|
226
|
-
# undef XXH_NAMESPACE
|
|
227
|
-
#endif
|
|
228
|
-
|
|
229
379
|
#ifdef XXH_NAMESPACE
|
|
230
380
|
# define XXH_CAT(A,B) A##B
|
|
231
381
|
# define XXH_NAME2(A,B) XXH_CAT(A,B)
|
|
@@ -254,23 +404,28 @@ extern "C" {
|
|
|
254
404
|
# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
|
|
255
405
|
# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
|
|
256
406
|
# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
|
|
407
|
+
# define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
|
|
257
408
|
# define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
|
|
258
409
|
# define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
|
|
259
410
|
# define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
|
|
260
411
|
# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
|
|
261
412
|
# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
|
|
262
413
|
# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
|
|
414
|
+
# define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
|
|
263
415
|
# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
|
|
264
416
|
# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
|
|
265
417
|
# define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
|
|
418
|
+
# define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
|
|
266
419
|
/* XXH3_128bits */
|
|
267
420
|
# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
|
|
268
421
|
# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
|
|
269
422
|
# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
|
|
270
423
|
# define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
|
|
424
|
+
# define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
|
|
271
425
|
# define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
|
|
272
426
|
# define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
|
|
273
427
|
# define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
|
|
428
|
+
# define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
|
|
274
429
|
# define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
|
|
275
430
|
# define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
|
|
276
431
|
# define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
|
|
@@ -280,30 +435,64 @@ extern "C" {
|
|
|
280
435
|
#endif
|
|
281
436
|
|
|
282
437
|
|
|
438
|
+
/* *************************************
|
|
439
|
+
* Compiler specifics
|
|
440
|
+
***************************************/
|
|
441
|
+
|
|
442
|
+
/* specific declaration modes for Windows */
|
|
443
|
+
#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
|
|
444
|
+
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
|
|
445
|
+
# ifdef XXH_EXPORT
|
|
446
|
+
# define XXH_PUBLIC_API __declspec(dllexport)
|
|
447
|
+
# elif XXH_IMPORT
|
|
448
|
+
# define XXH_PUBLIC_API __declspec(dllimport)
|
|
449
|
+
# endif
|
|
450
|
+
# else
|
|
451
|
+
# define XXH_PUBLIC_API /* do nothing */
|
|
452
|
+
# endif
|
|
453
|
+
#endif
|
|
454
|
+
|
|
455
|
+
#if defined (__GNUC__)
|
|
456
|
+
# define XXH_CONSTF __attribute__((const))
|
|
457
|
+
# define XXH_PUREF __attribute__((pure))
|
|
458
|
+
# define XXH_MALLOCF __attribute__((malloc))
|
|
459
|
+
#else
|
|
460
|
+
# define XXH_CONSTF /* disable */
|
|
461
|
+
# define XXH_PUREF
|
|
462
|
+
# define XXH_MALLOCF
|
|
463
|
+
#endif
|
|
464
|
+
|
|
283
465
|
/* *************************************
|
|
284
466
|
* Version
|
|
285
467
|
***************************************/
|
|
286
468
|
#define XXH_VERSION_MAJOR 0
|
|
287
469
|
#define XXH_VERSION_MINOR 8
|
|
288
470
|
#define XXH_VERSION_RELEASE 1
|
|
471
|
+
/*! @brief Version number, encoded as two digits each */
|
|
289
472
|
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
|
|
290
473
|
|
|
291
474
|
/*!
|
|
292
475
|
* @brief Obtains the xxHash version.
|
|
293
476
|
*
|
|
294
|
-
* This is
|
|
295
|
-
*
|
|
477
|
+
* This is mostly useful when xxHash is compiled as a shared library,
|
|
478
|
+
* since the returned value comes from the library, as opposed to header file.
|
|
296
479
|
*
|
|
297
|
-
* @return
|
|
480
|
+
* @return @ref XXH_VERSION_NUMBER of the invoked library.
|
|
298
481
|
*/
|
|
299
|
-
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
|
|
482
|
+
XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
|
|
300
483
|
|
|
301
484
|
|
|
302
485
|
/* ****************************
|
|
303
|
-
*
|
|
486
|
+
* Common basic types
|
|
304
487
|
******************************/
|
|
305
488
|
#include <stddef.h> /* size_t */
|
|
306
|
-
|
|
489
|
+
/*!
|
|
490
|
+
* @brief Exit code for the streaming API.
|
|
491
|
+
*/
|
|
492
|
+
typedef enum {
|
|
493
|
+
XXH_OK = 0, /*!< OK */
|
|
494
|
+
XXH_ERROR /*!< Error */
|
|
495
|
+
} XXH_errorcode;
|
|
307
496
|
|
|
308
497
|
|
|
309
498
|
/*-**********************************************************************
|
|
@@ -316,39 +505,38 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
|
|
316
505
|
* Not necessarily defined to `uint32_t` but functionally equivalent.
|
|
317
506
|
*/
|
|
318
507
|
typedef uint32_t XXH32_hash_t;
|
|
508
|
+
|
|
319
509
|
#elif !defined (__VMS) \
|
|
320
510
|
&& (defined (__cplusplus) \
|
|
321
511
|
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
|
322
512
|
# include <stdint.h>
|
|
323
513
|
typedef uint32_t XXH32_hash_t;
|
|
514
|
+
|
|
324
515
|
#else
|
|
325
516
|
# include <limits.h>
|
|
326
517
|
# if UINT_MAX == 0xFFFFFFFFUL
|
|
327
518
|
typedef unsigned int XXH32_hash_t;
|
|
519
|
+
# elif ULONG_MAX == 0xFFFFFFFFUL
|
|
520
|
+
typedef unsigned long XXH32_hash_t;
|
|
328
521
|
# else
|
|
329
|
-
#
|
|
330
|
-
typedef unsigned long XXH32_hash_t;
|
|
331
|
-
# else
|
|
332
|
-
# error "unsupported platform: need a 32-bit type"
|
|
333
|
-
# endif
|
|
522
|
+
# error "unsupported platform: need a 32-bit type"
|
|
334
523
|
# endif
|
|
335
524
|
#endif
|
|
336
525
|
|
|
337
526
|
/*!
|
|
338
527
|
* @}
|
|
339
528
|
*
|
|
340
|
-
* @defgroup
|
|
529
|
+
* @defgroup XXH32_family XXH32 family
|
|
341
530
|
* @ingroup public
|
|
342
531
|
* Contains functions used in the classic 32-bit xxHash algorithm.
|
|
343
532
|
*
|
|
344
533
|
* @note
|
|
345
|
-
* XXH32 is
|
|
346
|
-
*
|
|
347
|
-
* systems, and offers true 64/128 bit hash results.
|
|
348
|
-
* level of dispersion, and greatly reduces the risks of collisions.
|
|
534
|
+
* XXH32 is useful for older platforms, with no or poor 64-bit performance.
|
|
535
|
+
* Note that the @ref XXH3_family provides competitive speed for both 32-bit
|
|
536
|
+
* and 64-bit systems, and offers true 64/128 bit hash results.
|
|
349
537
|
*
|
|
350
|
-
* @see @ref
|
|
351
|
-
* @see @ref
|
|
538
|
+
* @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
|
|
539
|
+
* @see @ref XXH32_impl for implementation details
|
|
352
540
|
* @{
|
|
353
541
|
*/
|
|
354
542
|
|
|
@@ -357,6 +545,8 @@ typedef uint32_t XXH32_hash_t;
|
|
|
357
545
|
*
|
|
358
546
|
* Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
|
|
359
547
|
*
|
|
548
|
+
* See @ref single_shot_example "Single Shot Example" for an example.
|
|
549
|
+
*
|
|
360
550
|
* @param input The block of data to be hashed, at least @p length bytes in size.
|
|
361
551
|
* @param length The length of @p input, in bytes.
|
|
362
552
|
* @param seed The 32-bit seed to alter the hash's output predictably.
|
|
@@ -374,8 +564,9 @@ typedef uint32_t XXH32_hash_t;
|
|
|
374
564
|
* @see
|
|
375
565
|
* XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
|
|
376
566
|
*/
|
|
377
|
-
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
|
|
567
|
+
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
|
|
378
568
|
|
|
569
|
+
#ifndef XXH_NO_STREAM
|
|
379
570
|
/*!
|
|
380
571
|
* Streaming functions generate the xxHash value from an incremental input.
|
|
381
572
|
* This method is slower than single-call functions, due to state management.
|
|
@@ -398,32 +589,7 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_
|
|
|
398
589
|
*
|
|
399
590
|
* When done, release the state using `XXH*_freeState()`.
|
|
400
591
|
*
|
|
401
|
-
*
|
|
402
|
-
* @code{.c}
|
|
403
|
-
* #include <stdio.h>
|
|
404
|
-
* #include <xxhash.h>
|
|
405
|
-
* #define BUFFER_SIZE 256
|
|
406
|
-
*
|
|
407
|
-
* // Note: XXH64 and XXH3 use the same interface.
|
|
408
|
-
* XXH32_hash_t
|
|
409
|
-
* hashFile(FILE* stream)
|
|
410
|
-
* {
|
|
411
|
-
* XXH32_state_t* state;
|
|
412
|
-
* unsigned char buf[BUFFER_SIZE];
|
|
413
|
-
* size_t amt;
|
|
414
|
-
* XXH32_hash_t hash;
|
|
415
|
-
*
|
|
416
|
-
* state = XXH32_createState(); // Create a state
|
|
417
|
-
* assert(state != NULL); // Error check here
|
|
418
|
-
* XXH32_reset(state, 0xbaad5eed); // Reset state with our seed
|
|
419
|
-
* while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
|
|
420
|
-
* XXH32_update(state, buf, amt); // Hash the file in chunks
|
|
421
|
-
* }
|
|
422
|
-
* hash = XXH32_digest(state); // Finalize the hash
|
|
423
|
-
* XXH32_freeState(state); // Clean up
|
|
424
|
-
* return hash;
|
|
425
|
-
* }
|
|
426
|
-
* @endcode
|
|
592
|
+
* @see streaming_example at the top of @ref xxhash.h for an example.
|
|
427
593
|
*/
|
|
428
594
|
|
|
429
595
|
/*!
|
|
@@ -440,7 +606,7 @@ typedef struct XXH32_state_s XXH32_state_t;
|
|
|
440
606
|
* Must be freed with XXH32_freeState().
|
|
441
607
|
* @return An allocated XXH32_state_t on success, `NULL` on failure.
|
|
442
608
|
*/
|
|
443
|
-
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
|
|
609
|
+
XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
|
|
444
610
|
/*!
|
|
445
611
|
* @brief Frees an @ref XXH32_state_t.
|
|
446
612
|
*
|
|
@@ -508,7 +674,8 @@ XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void*
|
|
|
508
674
|
*
|
|
509
675
|
* @return The calculated xxHash32 value from that state.
|
|
510
676
|
*/
|
|
511
|
-
XXH_PUBLIC_API XXH32_hash_t
|
|
677
|
+
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
|
|
678
|
+
#endif /* !XXH_NO_STREAM */
|
|
512
679
|
|
|
513
680
|
/******* Canonical representation *******/
|
|
514
681
|
|
|
@@ -559,7 +726,52 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
|
559
726
|
*
|
|
560
727
|
* @return The converted hash.
|
|
561
728
|
*/
|
|
562
|
-
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
|
729
|
+
XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
|
730
|
+
|
|
731
|
+
|
|
732
|
+
#ifdef __has_attribute
|
|
733
|
+
# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
|
|
734
|
+
#else
|
|
735
|
+
# define XXH_HAS_ATTRIBUTE(x) 0
|
|
736
|
+
#endif
|
|
737
|
+
|
|
738
|
+
/* C-language Attributes are added in C23. */
|
|
739
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
|
|
740
|
+
# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
|
|
741
|
+
#else
|
|
742
|
+
# define XXH_HAS_C_ATTRIBUTE(x) 0
|
|
743
|
+
#endif
|
|
744
|
+
|
|
745
|
+
#if defined(__cplusplus) && defined(__has_cpp_attribute)
|
|
746
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
|
|
747
|
+
#else
|
|
748
|
+
# define XXH_HAS_CPP_ATTRIBUTE(x) 0
|
|
749
|
+
#endif
|
|
750
|
+
|
|
751
|
+
/*
|
|
752
|
+
* Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
|
|
753
|
+
* introduced in CPP17 and C23.
|
|
754
|
+
* CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
|
|
755
|
+
* C23 : https://en.cppreference.com/w/c/language/attributes/fallthrough
|
|
756
|
+
*/
|
|
757
|
+
#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
|
|
758
|
+
# define XXH_FALLTHROUGH [[fallthrough]]
|
|
759
|
+
#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
|
|
760
|
+
# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
|
|
761
|
+
#else
|
|
762
|
+
# define XXH_FALLTHROUGH /* fallthrough */
|
|
763
|
+
#endif
|
|
764
|
+
|
|
765
|
+
/*
|
|
766
|
+
* Define XXH_NOESCAPE for annotated pointers in public API.
|
|
767
|
+
* https://clang.llvm.org/docs/AttributeReference.html#noescape
|
|
768
|
+
* As of writing this, only supported by clang.
|
|
769
|
+
*/
|
|
770
|
+
#if XXH_HAS_ATTRIBUTE(noescape)
|
|
771
|
+
# define XXH_NOESCAPE __attribute__((noescape))
|
|
772
|
+
#else
|
|
773
|
+
# define XXH_NOESCAPE
|
|
774
|
+
#endif
|
|
563
775
|
|
|
564
776
|
|
|
565
777
|
/*!
|
|
@@ -598,18 +810,17 @@ typedef uint64_t XXH64_hash_t;
|
|
|
598
810
|
/*!
|
|
599
811
|
* @}
|
|
600
812
|
*
|
|
601
|
-
* @defgroup
|
|
813
|
+
* @defgroup XXH64_family XXH64 family
|
|
602
814
|
* @ingroup public
|
|
603
815
|
* @{
|
|
604
816
|
* Contains functions used in the classic 64-bit xxHash algorithm.
|
|
605
817
|
*
|
|
606
818
|
* @note
|
|
607
819
|
* XXH3 provides competitive speed for both 32-bit and 64-bit systems,
|
|
608
|
-
* and offers true 64/128 bit hash results.
|
|
609
|
-
*
|
|
820
|
+
* and offers true 64/128 bit hash results.
|
|
821
|
+
* It provides better speed for systems with vector processing capabilities.
|
|
610
822
|
*/
|
|
611
823
|
|
|
612
|
-
|
|
613
824
|
/*!
|
|
614
825
|
* @brief Calculates the 64-bit hash of @p input using xxHash64.
|
|
615
826
|
*
|
|
@@ -633,32 +844,35 @@ typedef uint64_t XXH64_hash_t;
|
|
|
633
844
|
* @see
|
|
634
845
|
* XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
|
|
635
846
|
*/
|
|
636
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
|
|
847
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
|
|
637
848
|
|
|
638
849
|
/******* Streaming *******/
|
|
850
|
+
#ifndef XXH_NO_STREAM
|
|
639
851
|
/*!
|
|
640
852
|
* @brief The opaque state struct for the XXH64 streaming API.
|
|
641
853
|
*
|
|
642
854
|
* @see XXH64_state_s for details.
|
|
643
855
|
*/
|
|
644
856
|
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
|
|
645
|
-
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
|
|
857
|
+
XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
|
|
646
858
|
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
|
|
647
|
-
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
|
|
648
|
-
|
|
649
|
-
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, XXH64_hash_t seed);
|
|
650
|
-
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
|
|
651
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
|
|
859
|
+
XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
|
|
652
860
|
|
|
861
|
+
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
|
|
862
|
+
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
|
|
863
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
|
|
864
|
+
#endif /* !XXH_NO_STREAM */
|
|
653
865
|
/******* Canonical representation *******/
|
|
654
866
|
typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
|
|
655
|
-
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
|
|
656
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
|
|
867
|
+
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
|
|
868
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
|
|
869
|
+
|
|
870
|
+
#ifndef XXH_NO_XXH3
|
|
657
871
|
|
|
658
872
|
/*!
|
|
659
873
|
* @}
|
|
660
874
|
* ************************************************************************
|
|
661
|
-
* @defgroup
|
|
875
|
+
* @defgroup XXH3_family XXH3 family
|
|
662
876
|
* @ingroup public
|
|
663
877
|
* @{
|
|
664
878
|
*
|
|
@@ -678,12 +892,14 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
|
678
892
|
*
|
|
679
893
|
* XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
|
|
680
894
|
* but does not require it.
|
|
681
|
-
*
|
|
682
|
-
*
|
|
683
|
-
*
|
|
895
|
+
* Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
|
|
896
|
+
* at competitive speeds, even without vector support. Further details are
|
|
897
|
+
* explained in the implementation.
|
|
684
898
|
*
|
|
685
899
|
* Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
|
|
686
|
-
* ZVector and scalar targets. This can be controlled via the XXH_VECTOR
|
|
900
|
+
* ZVector and scalar targets. This can be controlled via the @ref XXH_VECTOR
|
|
901
|
+
* macro. For the x86 family, an automatic dispatcher is included separately
|
|
902
|
+
* in @ref xxh_x86dispatch.c.
|
|
687
903
|
*
|
|
688
904
|
* XXH3 implementation is portable:
|
|
689
905
|
* it has a generic C90 formulation that can be compiled on any platform,
|
|
@@ -699,24 +915,42 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
|
699
915
|
*
|
|
700
916
|
* The API supports one-shot hashing, streaming mode, and custom secrets.
|
|
701
917
|
*/
|
|
702
|
-
|
|
703
918
|
/*-**********************************************************************
|
|
704
919
|
* XXH3 64-bit variant
|
|
705
920
|
************************************************************************/
|
|
706
921
|
|
|
707
|
-
|
|
708
|
-
*
|
|
709
|
-
*
|
|
710
|
-
|
|
922
|
+
/*!
|
|
923
|
+
* @brief 64-bit unseeded variant of XXH3.
|
|
924
|
+
*
|
|
925
|
+
* This is equivalent to @ref XXH3_64bits_withSeed() with a seed of 0, however
|
|
926
|
+
* it may have slightly better performance due to constant propagation of the
|
|
927
|
+
* defaults.
|
|
928
|
+
*
|
|
929
|
+
* @see
|
|
930
|
+
* XXH32(), XXH64(), XXH3_128bits(): equivalent for the other xxHash algorithms
|
|
931
|
+
* @see
|
|
932
|
+
* XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
|
|
933
|
+
* @see
|
|
934
|
+
* XXH3_64bits_reset(), XXH3_64bits_update(), XXH3_64bits_digest(): Streaming version.
|
|
935
|
+
*/
|
|
936
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
|
|
711
937
|
|
|
712
|
-
|
|
713
|
-
*
|
|
714
|
-
*
|
|
715
|
-
*
|
|
938
|
+
/*!
|
|
939
|
+
* @brief 64-bit seeded variant of XXH3
|
|
940
|
+
*
|
|
941
|
+
* This variant generates a custom secret on the fly based on default secret
|
|
942
|
+
* altered using the `seed` value.
|
|
943
|
+
*
|
|
716
944
|
* While this operation is decently fast, note that it's not completely free.
|
|
717
|
-
*
|
|
945
|
+
*
|
|
946
|
+
* @note
|
|
947
|
+
* seed == 0 produces the same results as @ref XXH3_64bits().
|
|
948
|
+
*
|
|
949
|
+
* @param input The data to hash
|
|
950
|
+
* @param length The length
|
|
951
|
+
* @param seed The 64-bit seed to alter the state.
|
|
718
952
|
*/
|
|
719
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void*
|
|
953
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
|
|
720
954
|
|
|
721
955
|
/*!
|
|
722
956
|
* The bare minimum size for a custom secret.
|
|
@@ -727,23 +961,29 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
|
|
|
727
961
|
*/
|
|
728
962
|
#define XXH3_SECRET_SIZE_MIN 136
|
|
729
963
|
|
|
730
|
-
|
|
731
|
-
*
|
|
964
|
+
/*!
|
|
965
|
+
* @brief 64-bit variant of XXH3 with a custom "secret".
|
|
966
|
+
*
|
|
732
967
|
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
|
|
733
968
|
* This makes it more difficult for an external actor to prepare an intentional collision.
|
|
734
969
|
* The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
|
|
735
|
-
* However, the quality of
|
|
736
|
-
*
|
|
970
|
+
* However, the quality of the secret impacts the dispersion of the hash algorithm.
|
|
971
|
+
* Therefore, the secret _must_ look like a bunch of random bytes.
|
|
737
972
|
* Avoid "trivial" or structured data such as repeated sequences or a text document.
|
|
738
|
-
* Whenever
|
|
739
|
-
* consider
|
|
740
|
-
*
|
|
741
|
-
*
|
|
973
|
+
* Whenever in doubt about the "randomness" of the blob of bytes,
|
|
974
|
+
* consider employing "XXH3_generateSecret()" instead (see below).
|
|
975
|
+
* It will generate a proper high entropy secret derived from the blob of bytes.
|
|
976
|
+
* Another advantage of using XXH3_generateSecret() is that
|
|
977
|
+
* it guarantees that all bits within the initial blob of bytes
|
|
978
|
+
* will impact every bit of the output.
|
|
979
|
+
* This is not necessarily the case when using the blob of bytes directly
|
|
980
|
+
* because, when hashing _small_ inputs, only a portion of the secret is employed.
|
|
742
981
|
*/
|
|
743
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
|
982
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
|
|
744
983
|
|
|
745
984
|
|
|
746
985
|
/******* Streaming *******/
|
|
986
|
+
#ifndef XXH_NO_STREAM
|
|
747
987
|
/*
|
|
748
988
|
* Streaming requires state maintenance.
|
|
749
989
|
* This operation costs memory and CPU.
|
|
@@ -757,23 +997,23 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
|
|
|
757
997
|
* @see XXH3_state_s for details.
|
|
758
998
|
*/
|
|
759
999
|
typedef struct XXH3_state_s XXH3_state_t;
|
|
760
|
-
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
|
|
1000
|
+
XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
|
|
761
1001
|
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
|
|
762
|
-
XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
|
|
1002
|
+
XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
|
|
763
1003
|
|
|
764
1004
|
/*
|
|
765
1005
|
* XXH3_64bits_reset():
|
|
766
1006
|
* Initialize with default parameters.
|
|
767
1007
|
* digest will be equivalent to `XXH3_64bits()`.
|
|
768
1008
|
*/
|
|
769
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
|
|
1009
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
|
|
770
1010
|
/*
|
|
771
1011
|
* XXH3_64bits_reset_withSeed():
|
|
772
1012
|
* Generate a custom secret from `seed`, and store it into `statePtr`.
|
|
773
1013
|
* digest will be equivalent to `XXH3_64bits_withSeed()`.
|
|
774
1014
|
*/
|
|
775
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
|
|
776
|
-
|
|
1015
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
|
|
1016
|
+
/*!
|
|
777
1017
|
* XXH3_64bits_reset_withSecret():
|
|
778
1018
|
* `secret` is referenced, it _must outlive_ the hash streaming session.
|
|
779
1019
|
* Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
|
|
@@ -782,10 +1022,11 @@ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr,
|
|
|
782
1022
|
* When in doubt about the randomness of a candidate `secret`,
|
|
783
1023
|
* consider employing `XXH3_generateSecret()` instead (see below).
|
|
784
1024
|
*/
|
|
785
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
|
|
1025
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
|
|
786
1026
|
|
|
787
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
|
|
788
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
|
|
1027
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
|
|
1028
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
|
|
1029
|
+
#endif /* !XXH_NO_STREAM */
|
|
789
1030
|
|
|
790
1031
|
/* note : canonical representation of XXH3 is the same as XXH64
|
|
791
1032
|
* since they both produce XXH64_hash_t values */
|
|
@@ -806,11 +1047,31 @@ typedef struct {
|
|
|
806
1047
|
XXH64_hash_t high64; /*!< `value >> 64` */
|
|
807
1048
|
} XXH128_hash_t;
|
|
808
1049
|
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
1050
|
+
/*!
|
|
1051
|
+
* @brief Unseeded 128-bit variant of XXH3
|
|
1052
|
+
*
|
|
1053
|
+
* The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
|
|
1054
|
+
* for shorter inputs.
|
|
1055
|
+
*
|
|
1056
|
+
* This is equivalent to @ref XXH3_128bits_withSeed() with a seed of 0, however
|
|
1057
|
+
* it may have slightly better performance due to constant propagation of the
|
|
1058
|
+
* defaults.
|
|
1059
|
+
*
|
|
1060
|
+
* @see
|
|
1061
|
+
* XXH32(), XXH64(), XXH3_64bits(): equivalent for the other xxHash algorithms
|
|
1062
|
+
* @see
|
|
1063
|
+
* XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
|
|
1064
|
+
* @see
|
|
1065
|
+
* XXH3_128bits_reset(), XXH3_128bits_update(), XXH3_128bits_digest(): Streaming version.
|
|
1066
|
+
*/
|
|
1067
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
|
|
1068
|
+
/*! @brief Seeded 128-bit variant of XXH3. @see XXH3_64bits_withSeed(). */
|
|
1069
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
|
|
1070
|
+
/*! @brief Custom secret 128-bit variant of XXH3. @see XXH3_64bits_withSecret(). */
|
|
1071
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
|
|
812
1072
|
|
|
813
1073
|
/******* Streaming *******/
|
|
1074
|
+
#ifndef XXH_NO_STREAM
|
|
814
1075
|
/*
|
|
815
1076
|
* Streaming requires state maintenance.
|
|
816
1077
|
* This operation costs memory and CPU.
|
|
@@ -823,12 +1084,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t le
|
|
|
823
1084
|
* All reset and streaming functions have same meaning as their 64-bit counterpart.
|
|
824
1085
|
*/
|
|
825
1086
|
|
|
826
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
|
|
827
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
|
|
828
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
|
|
1087
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
|
|
1088
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
|
|
1089
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
|
|
829
1090
|
|
|
830
|
-
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
|
|
831
|
-
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
|
|
1091
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
|
|
1092
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
|
|
1093
|
+
#endif /* !XXH_NO_STREAM */
|
|
832
1094
|
|
|
833
1095
|
/* Following helper functions make it possible to compare XXH128_hast_t values.
|
|
834
1096
|
* Since XXH128_hash_t is a structure, this capability is not offered by the language.
|
|
@@ -838,26 +1100,26 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
|
|
|
838
1100
|
* XXH128_isEqual():
|
|
839
1101
|
* Return: 1 if `h1` and `h2` are equal, 0 if they are not.
|
|
840
1102
|
*/
|
|
841
|
-
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
|
|
1103
|
+
XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
|
|
842
1104
|
|
|
843
1105
|
/*!
|
|
844
|
-
*
|
|
845
|
-
*
|
|
1106
|
+
* @brief Compares two @ref XXH128_hash_t
|
|
846
1107
|
* This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
|
|
847
1108
|
*
|
|
848
|
-
* return: >0 if *h128_1 > *h128_2
|
|
849
|
-
*
|
|
850
|
-
*
|
|
1109
|
+
* @return: >0 if *h128_1 > *h128_2
|
|
1110
|
+
* =0 if *h128_1 == *h128_2
|
|
1111
|
+
* <0 if *h128_1 < *h128_2
|
|
851
1112
|
*/
|
|
852
|
-
XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
|
|
1113
|
+
XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
|
|
853
1114
|
|
|
854
1115
|
|
|
855
1116
|
/******* Canonical representation *******/
|
|
856
1117
|
typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
|
|
857
|
-
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
|
|
858
|
-
XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
|
|
1118
|
+
XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
|
|
1119
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
|
|
859
1120
|
|
|
860
1121
|
|
|
1122
|
+
#endif /* !XXH_NO_XXH3 */
|
|
861
1123
|
#endif /* XXH_NO_LONG_LONG */
|
|
862
1124
|
|
|
863
1125
|
/*!
|
|
@@ -898,13 +1160,10 @@ XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t*
|
|
|
898
1160
|
struct XXH32_state_s {
|
|
899
1161
|
XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
|
|
900
1162
|
XXH32_hash_t large_len; /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
|
|
901
|
-
XXH32_hash_t
|
|
902
|
-
XXH32_hash_t v2; /*!< Second accumulator lane */
|
|
903
|
-
XXH32_hash_t v3; /*!< Third accumulator lane */
|
|
904
|
-
XXH32_hash_t v4; /*!< Fourth accumulator lane */
|
|
1163
|
+
XXH32_hash_t v[4]; /*!< Accumulator lanes */
|
|
905
1164
|
XXH32_hash_t mem32[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[16]. */
|
|
906
1165
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem32 */
|
|
907
|
-
XXH32_hash_t reserved; /*!< Reserved field. Do not read
|
|
1166
|
+
XXH32_hash_t reserved; /*!< Reserved field. Do not read nor write to it. */
|
|
908
1167
|
}; /* typedef'd to XXH32_state_t */
|
|
909
1168
|
|
|
910
1169
|
|
|
@@ -924,19 +1183,21 @@ struct XXH32_state_s {
|
|
|
924
1183
|
*/
|
|
925
1184
|
struct XXH64_state_s {
|
|
926
1185
|
XXH64_hash_t total_len; /*!< Total length hashed. This is always 64-bit. */
|
|
927
|
-
XXH64_hash_t
|
|
928
|
-
XXH64_hash_t v2; /*!< Second accumulator lane */
|
|
929
|
-
XXH64_hash_t v3; /*!< Third accumulator lane */
|
|
930
|
-
XXH64_hash_t v4; /*!< Fourth accumulator lane */
|
|
1186
|
+
XXH64_hash_t v[4]; /*!< Accumulator lanes */
|
|
931
1187
|
XXH64_hash_t mem64[4]; /*!< Internal buffer for partial reads. Treated as unsigned char[32]. */
|
|
932
1188
|
XXH32_hash_t memsize; /*!< Amount of data in @ref mem64 */
|
|
933
1189
|
XXH32_hash_t reserved32; /*!< Reserved field, needed for padding anyways*/
|
|
934
|
-
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it
|
|
1190
|
+
XXH64_hash_t reserved64; /*!< Reserved field. Do not read or write to it. */
|
|
935
1191
|
}; /* typedef'd to XXH64_state_t */
|
|
936
1192
|
|
|
937
|
-
#
|
|
1193
|
+
#ifndef XXH_NO_XXH3
|
|
1194
|
+
|
|
1195
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
|
|
938
1196
|
# include <stdalign.h>
|
|
939
1197
|
# define XXH_ALIGN(n) alignas(n)
|
|
1198
|
+
#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
|
|
1199
|
+
/* In C++ alignas() is a keyword */
|
|
1200
|
+
# define XXH_ALIGN(n) alignas(n)
|
|
940
1201
|
#elif defined(__GNUC__)
|
|
941
1202
|
# define XXH_ALIGN(n) __attribute__ ((aligned(n)))
|
|
942
1203
|
#elif defined(_MSC_VER)
|
|
@@ -947,6 +1208,7 @@ struct XXH64_state_s {
|
|
|
947
1208
|
|
|
948
1209
|
/* Old GCC versions only accept the attribute after the type in structures. */
|
|
949
1210
|
#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) /* C11+ */ \
|
|
1211
|
+
&& ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
|
|
950
1212
|
&& defined(__GNUC__)
|
|
951
1213
|
# define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
|
|
952
1214
|
#else
|
|
@@ -976,16 +1238,18 @@ struct XXH64_state_s {
|
|
|
976
1238
|
* @brief Structure for XXH3 streaming API.
|
|
977
1239
|
*
|
|
978
1240
|
* @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
|
|
979
|
-
* @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
|
|
980
|
-
* an opaque type.
|
|
1241
|
+
* @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
|
|
1242
|
+
* Otherwise it is an opaque type.
|
|
1243
|
+
* Never use this definition in combination with dynamic library.
|
|
1244
|
+
* This allows fields to safely be changed in the future.
|
|
981
1245
|
*
|
|
982
|
-
* @note **This structure has a strict alignment requirement of 64 bytes
|
|
983
|
-
* not allocate this with `malloc()` or `new`,
|
|
984
|
-
*
|
|
985
|
-
* allocation.
|
|
1246
|
+
* @note ** This structure has a strict alignment requirement of 64 bytes!! **
|
|
1247
|
+
* Do not allocate this with `malloc()` or `new`,
|
|
1248
|
+
* it will not be sufficiently aligned.
|
|
1249
|
+
* Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
|
|
986
1250
|
*
|
|
987
1251
|
* Typedef'd to @ref XXH3_state_t.
|
|
988
|
-
* Do
|
|
1252
|
+
* Do never access the members of this struct directly.
|
|
989
1253
|
*
|
|
990
1254
|
* @see XXH3_INITSTATE() for stack initialization.
|
|
991
1255
|
* @see XXH3_createState(), XXH3_freeState().
|
|
@@ -993,14 +1257,14 @@ struct XXH64_state_s {
|
|
|
993
1257
|
*/
|
|
994
1258
|
struct XXH3_state_s {
|
|
995
1259
|
XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
|
|
996
|
-
/*!< The 8 accumulators.
|
|
1260
|
+
/*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
|
|
997
1261
|
XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
|
|
998
1262
|
/*!< Used to store a custom secret generated from a seed. */
|
|
999
1263
|
XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
|
|
1000
1264
|
/*!< The internal buffer. @see XXH32_state_s::mem32 */
|
|
1001
1265
|
XXH32_hash_t bufferedSize;
|
|
1002
1266
|
/*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
|
|
1003
|
-
XXH32_hash_t
|
|
1267
|
+
XXH32_hash_t useSeed;
|
|
1004
1268
|
/*!< Reserved field. Needed for padding on 64-bit. */
|
|
1005
1269
|
size_t nbStripesSoFar;
|
|
1006
1270
|
/*!< Number or stripes processed. */
|
|
@@ -1036,45 +1300,156 @@ struct XXH3_state_s {
|
|
|
1036
1300
|
#define XXH3_INITSTATE(XXH3_state_ptr) { (XXH3_state_ptr)->seed = 0; }
|
|
1037
1301
|
|
|
1038
1302
|
|
|
1303
|
+
/*!
|
|
1304
|
+
* simple alias to pre-selected XXH3_128bits variant
|
|
1305
|
+
*/
|
|
1306
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
|
|
1307
|
+
|
|
1308
|
+
|
|
1039
1309
|
/* === Experimental API === */
|
|
1040
1310
|
/* Symbols defined below must be considered tied to a specific library version. */
|
|
1041
1311
|
|
|
1042
|
-
|
|
1312
|
+
/*!
|
|
1043
1313
|
* XXH3_generateSecret():
|
|
1044
1314
|
*
|
|
1045
1315
|
* Derive a high-entropy secret from any user-defined content, named customSeed.
|
|
1046
1316
|
* The generated secret can be used in combination with `*_withSecret()` functions.
|
|
1047
|
-
* The `_withSecret()` variants are useful to provide a higher level of protection
|
|
1048
|
-
* as it becomes much more difficult for an external actor to
|
|
1317
|
+
* The `_withSecret()` variants are useful to provide a higher level of protection
|
|
1318
|
+
* than 64-bit seed, as it becomes much more difficult for an external actor to
|
|
1319
|
+
* guess how to impact the calculation logic.
|
|
1049
1320
|
*
|
|
1050
1321
|
* The function accepts as input a custom seed of any length and any content,
|
|
1051
|
-
* and derives from it a high-entropy secret of length
|
|
1052
|
-
*
|
|
1053
|
-
* The generated secret is _always_ XXH_SECRET_DEFAULT_SIZE bytes long.
|
|
1322
|
+
* and derives from it a high-entropy secret of length @p secretSize into an
|
|
1323
|
+
* already allocated buffer @p secretBuffer.
|
|
1054
1324
|
*
|
|
1055
1325
|
* The generated secret can then be used with any `*_withSecret()` variant.
|
|
1056
|
-
*
|
|
1057
|
-
*
|
|
1326
|
+
* The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
|
|
1327
|
+
* @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
|
|
1058
1328
|
* are part of this list. They all accept a `secret` parameter
|
|
1059
|
-
* which must be
|
|
1329
|
+
* which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
|
|
1060
1330
|
* _and_ feature very high entropy (consist of random-looking bytes).
|
|
1061
|
-
* These conditions can be a high bar to meet, so
|
|
1062
|
-
*
|
|
1331
|
+
* These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
|
|
1332
|
+
* be employed to ensure proper quality.
|
|
1063
1333
|
*
|
|
1064
|
-
* customSeed can be anything. It can have any size, even small ones,
|
|
1065
|
-
* and its content can be anything, even
|
|
1066
|
-
* The resulting `secret` will nonetheless provide all
|
|
1334
|
+
* @p customSeed can be anything. It can have any size, even small ones,
|
|
1335
|
+
* and its content can be anything, even "poor entropy" sources such as a bunch
|
|
1336
|
+
* of zeroes. The resulting `secret` will nonetheless provide all required qualities.
|
|
1337
|
+
*
|
|
1338
|
+
* @pre
|
|
1339
|
+
* - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
|
|
1340
|
+
* - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
|
|
1067
1341
|
*
|
|
1068
|
-
*
|
|
1069
|
-
*
|
|
1342
|
+
* Example code:
|
|
1343
|
+
* @code{.c}
|
|
1344
|
+
* #include <stdio.h>
|
|
1345
|
+
* #include <stdlib.h>
|
|
1346
|
+
* #include <string.h>
|
|
1347
|
+
* #define XXH_STATIC_LINKING_ONLY // expose unstable API
|
|
1348
|
+
* #include "xxhash.h"
|
|
1349
|
+
* // Hashes argv[2] using the entropy from argv[1].
|
|
1350
|
+
* int main(int argc, char* argv[])
|
|
1351
|
+
* {
|
|
1352
|
+
* char secret[XXH3_SECRET_SIZE_MIN];
|
|
1353
|
+
* if (argv != 3) { return 1; }
|
|
1354
|
+
* XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
|
|
1355
|
+
* XXH64_hash_t h = XXH3_64bits_withSecret(
|
|
1356
|
+
* argv[2], strlen(argv[2]),
|
|
1357
|
+
* secret, sizeof(secret)
|
|
1358
|
+
* );
|
|
1359
|
+
* printf("%016llx\n", (unsigned long long) h);
|
|
1360
|
+
* }
|
|
1361
|
+
* @endcode
|
|
1070
1362
|
*/
|
|
1071
|
-
XXH_PUBLIC_API
|
|
1072
|
-
|
|
1363
|
+
XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
|
|
1073
1364
|
|
|
1074
|
-
|
|
1075
|
-
|
|
1365
|
+
/*!
|
|
1366
|
+
* @brief Generate the same secret as the _withSeed() variants.
|
|
1367
|
+
*
|
|
1368
|
+
* The generated secret can be used in combination with
|
|
1369
|
+
*`*_withSecret()` and `_withSecretandSeed()` variants.
|
|
1370
|
+
*
|
|
1371
|
+
* Example C++ `std::string` hash class:
|
|
1372
|
+
* @code{.cpp}
|
|
1373
|
+
* #include <string>
|
|
1374
|
+
* #define XXH_STATIC_LINKING_ONLY // expose unstable API
|
|
1375
|
+
* #include "xxhash.h"
|
|
1376
|
+
* // Slow, seeds each time
|
|
1377
|
+
* class HashSlow {
|
|
1378
|
+
* XXH64_hash_t seed;
|
|
1379
|
+
* public:
|
|
1380
|
+
* HashSlow(XXH64_hash_t s) : seed{s} {}
|
|
1381
|
+
* size_t operator()(const std::string& x) const {
|
|
1382
|
+
* return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
|
|
1383
|
+
* }
|
|
1384
|
+
* };
|
|
1385
|
+
* // Fast, caches the seeded secret for future uses.
|
|
1386
|
+
* class HashFast {
|
|
1387
|
+
* unsigned char secret[XXH3_SECRET_SIZE_MIN];
|
|
1388
|
+
* public:
|
|
1389
|
+
* HashFast(XXH64_hash_t s) {
|
|
1390
|
+
* XXH3_generateSecret_fromSeed(secret, seed);
|
|
1391
|
+
* }
|
|
1392
|
+
* size_t operator()(const std::string& x) const {
|
|
1393
|
+
* return size_t{
|
|
1394
|
+
* XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
|
|
1395
|
+
* };
|
|
1396
|
+
* }
|
|
1397
|
+
* };
|
|
1398
|
+
* @endcode
|
|
1399
|
+
* @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
|
|
1400
|
+
* @param seed The seed to seed the state.
|
|
1401
|
+
*/
|
|
1402
|
+
XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
|
|
1076
1403
|
|
|
1404
|
+
/*!
|
|
1405
|
+
* These variants generate hash values using either
|
|
1406
|
+
* @p seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
|
|
1407
|
+
* or @p secret for "large" keys (>= XXH3_MIDSIZE_MAX).
|
|
1408
|
+
*
|
|
1409
|
+
* This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
|
|
1410
|
+
* `_withSeed()` has to generate the secret on the fly for "large" keys.
|
|
1411
|
+
* It's fast, but can be perceptible for "not so large" keys (< 1 KB).
|
|
1412
|
+
* `_withSecret()` has to generate the masks on the fly for "small" keys,
|
|
1413
|
+
* which requires more instructions than _withSeed() variants.
|
|
1414
|
+
* Therefore, _withSecretandSeed variant combines the best of both worlds.
|
|
1415
|
+
*
|
|
1416
|
+
* When @p secret has been generated by XXH3_generateSecret_fromSeed(),
|
|
1417
|
+
* this variant produces *exactly* the same results as `_withSeed()` variant,
|
|
1418
|
+
* hence offering only a pure speed benefit on "large" input,
|
|
1419
|
+
* by skipping the need to regenerate the secret for every large input.
|
|
1420
|
+
*
|
|
1421
|
+
* Another usage scenario is to hash the secret to a 64-bit hash value,
|
|
1422
|
+
* for example with XXH3_64bits(), which then becomes the seed,
|
|
1423
|
+
* and then employ both the seed and the secret in _withSecretandSeed().
|
|
1424
|
+
* On top of speed, an added benefit is that each bit in the secret
|
|
1425
|
+
* has a 50% chance to swap each bit in the output, via its impact to the seed.
|
|
1426
|
+
*
|
|
1427
|
+
* This is not guaranteed when using the secret directly in "small data" scenarios,
|
|
1428
|
+
* because only portions of the secret are employed for small data.
|
|
1429
|
+
*/
|
|
1430
|
+
XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
|
|
1431
|
+
XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
|
|
1432
|
+
XXH_NOESCAPE const void* secret, size_t secretSize,
|
|
1433
|
+
XXH64_hash_t seed);
|
|
1434
|
+
/*! @copydoc XXH3_64bits_withSecretandSeed() */
|
|
1435
|
+
XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
|
|
1436
|
+
XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
|
|
1437
|
+
XXH_NOESCAPE const void* secret, size_t secretSize,
|
|
1438
|
+
XXH64_hash_t seed64);
|
|
1439
|
+
#ifndef XXH_NO_STREAM
|
|
1440
|
+
/*! @copydoc XXH3_64bits_withSecretandSeed() */
|
|
1441
|
+
XXH_PUBLIC_API XXH_errorcode
|
|
1442
|
+
XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
|
|
1443
|
+
XXH_NOESCAPE const void* secret, size_t secretSize,
|
|
1444
|
+
XXH64_hash_t seed64);
|
|
1445
|
+
/*! @copydoc XXH3_64bits_withSecretandSeed() */
|
|
1446
|
+
XXH_PUBLIC_API XXH_errorcode
|
|
1447
|
+
XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
|
|
1448
|
+
XXH_NOESCAPE const void* secret, size_t secretSize,
|
|
1449
|
+
XXH64_hash_t seed64);
|
|
1450
|
+
#endif /* !XXH_NO_STREAM */
|
|
1077
1451
|
|
|
1452
|
+
#endif /* !XXH_NO_XXH3 */
|
|
1078
1453
|
#endif /* XXH_NO_LONG_LONG */
|
|
1079
1454
|
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
|
|
1080
1455
|
# define XXH_IMPLEMENTATION
|
|
@@ -1128,7 +1503,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1128
1503
|
/*!
|
|
1129
1504
|
* @brief Define this to disable 64-bit code.
|
|
1130
1505
|
*
|
|
1131
|
-
* Useful if only using the @ref
|
|
1506
|
+
* Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
|
|
1132
1507
|
*/
|
|
1133
1508
|
# define XXH_NO_LONG_LONG
|
|
1134
1509
|
# undef XXH_NO_LONG_LONG /* don't actually */
|
|
@@ -1151,7 +1526,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1151
1526
|
* Use `memcpy()`. Safe and portable. Note that most modern compilers will
|
|
1152
1527
|
* eliminate the function call and treat it as an unaligned access.
|
|
1153
1528
|
*
|
|
1154
|
-
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((
|
|
1529
|
+
* - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
|
|
1155
1530
|
* @par
|
|
1156
1531
|
* Depends on compiler extensions and is therefore not portable.
|
|
1157
1532
|
* This method is safe _if_ your compiler supports it,
|
|
@@ -1178,22 +1553,40 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1178
1553
|
* care, as what works on one compiler/platform/optimization level may cause
|
|
1179
1554
|
* another to read garbage data or even crash.
|
|
1180
1555
|
*
|
|
1181
|
-
* See https://
|
|
1556
|
+
* See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
|
|
1182
1557
|
*
|
|
1183
1558
|
* Prefer these methods in priority order (0 > 3 > 1 > 2)
|
|
1184
1559
|
*/
|
|
1185
1560
|
# define XXH_FORCE_MEMORY_ACCESS 0
|
|
1561
|
+
|
|
1186
1562
|
/*!
|
|
1187
|
-
* @def
|
|
1188
|
-
* @brief
|
|
1563
|
+
* @def XXH_SIZE_OPT
|
|
1564
|
+
* @brief Controls how much xxHash optimizes for size.
|
|
1189
1565
|
*
|
|
1190
|
-
*
|
|
1191
|
-
*
|
|
1566
|
+
* xxHash, when compiled, tends to result in a rather large binary size. This
|
|
1567
|
+
* is mostly due to heavy usage to forced inlining and constant folding of the
|
|
1568
|
+
* @ref XXH3_family to increase performance.
|
|
1192
1569
|
*
|
|
1193
|
-
*
|
|
1194
|
-
*
|
|
1570
|
+
* However, some developers prefer size over speed. This option can
|
|
1571
|
+
* significantly reduce the size of the generated code. When using the `-Os`
|
|
1572
|
+
* or `-Oz` options on GCC or Clang, this is defined to 1 by default,
|
|
1573
|
+
* otherwise it is defined to 0.
|
|
1574
|
+
*
|
|
1575
|
+
* Most of these size optimizations can be controlled manually.
|
|
1576
|
+
*
|
|
1577
|
+
* This is a number from 0-2.
|
|
1578
|
+
* - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
|
|
1579
|
+
* comes first.
|
|
1580
|
+
* - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
|
|
1581
|
+
* conservative and disables hacks that increase code size. It implies the
|
|
1582
|
+
* options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
|
|
1583
|
+
* and @ref XXH3_NEON_LANES == 8 if they are not already defined.
|
|
1584
|
+
* - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
|
|
1585
|
+
* Performance may cry. For example, the single shot functions just use the
|
|
1586
|
+
* streaming API.
|
|
1195
1587
|
*/
|
|
1196
|
-
# define
|
|
1588
|
+
# define XXH_SIZE_OPT 0
|
|
1589
|
+
|
|
1197
1590
|
/*!
|
|
1198
1591
|
* @def XXH_FORCE_ALIGN_CHECK
|
|
1199
1592
|
* @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
|
|
@@ -1215,9 +1608,11 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1215
1608
|
*
|
|
1216
1609
|
* In these cases, the alignment check can be removed by setting this macro to 0.
|
|
1217
1610
|
* Then the code will always use unaligned memory access.
|
|
1218
|
-
* Align check is automatically disabled on x86, x64
|
|
1611
|
+
* Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
|
|
1219
1612
|
* which are platforms known to offer good unaligned memory accesses performance.
|
|
1220
1613
|
*
|
|
1614
|
+
* It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
|
|
1615
|
+
*
|
|
1221
1616
|
* This option does not affect XXH3 (only XXH32 and XXH64).
|
|
1222
1617
|
*/
|
|
1223
1618
|
# define XXH_FORCE_ALIGN_CHECK 0
|
|
@@ -1239,24 +1634,22 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1239
1634
|
* XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
|
|
1240
1635
|
* compiler full control on whether to inline or not.
|
|
1241
1636
|
*
|
|
1242
|
-
* When not optimizing (-O0),
|
|
1243
|
-
*
|
|
1637
|
+
* When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
|
|
1638
|
+
* @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
|
|
1244
1639
|
*/
|
|
1245
1640
|
# define XXH_NO_INLINE_HINTS 0
|
|
1246
1641
|
|
|
1247
1642
|
/*!
|
|
1248
|
-
* @def
|
|
1249
|
-
* @brief Whether to
|
|
1643
|
+
* @def XXH32_ENDJMP
|
|
1644
|
+
* @brief Whether to use a jump for `XXH32_finalize`.
|
|
1250
1645
|
*
|
|
1251
|
-
* For performance, `XXH32_finalize`
|
|
1252
|
-
*
|
|
1646
|
+
* For performance, `XXH32_finalize` uses multiple branches in the finalizer.
|
|
1647
|
+
* This is generally preferable for performance,
|
|
1648
|
+
* but depending on exact architecture, a jmp may be preferable.
|
|
1253
1649
|
*
|
|
1254
|
-
* This is
|
|
1255
|
-
* the architecture, may even be slower
|
|
1256
|
-
*
|
|
1257
|
-
* This is automatically defined with `-Os`/`-Oz` on GCC and Clang.
|
|
1650
|
+
* This setting is only possibly making a difference for very small inputs.
|
|
1258
1651
|
*/
|
|
1259
|
-
# define
|
|
1652
|
+
# define XXH32_ENDJMP 0
|
|
1260
1653
|
|
|
1261
1654
|
/*!
|
|
1262
1655
|
* @internal
|
|
@@ -1267,27 +1660,46 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1267
1660
|
*/
|
|
1268
1661
|
# define XXH_OLD_NAMES
|
|
1269
1662
|
# undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
|
|
1663
|
+
|
|
1664
|
+
/*!
|
|
1665
|
+
* @def XXH_NO_STREAM
|
|
1666
|
+
* @brief Disables the streaming API.
|
|
1667
|
+
*
|
|
1668
|
+
* When xxHash is not inlined and the streaming functions are not used, disabling
|
|
1669
|
+
* the streaming functions can improve code size significantly, especially with
|
|
1670
|
+
* the @ref XXH3_family which tends to make constant folded copies of itself.
|
|
1671
|
+
*/
|
|
1672
|
+
# define XXH_NO_STREAM
|
|
1673
|
+
# undef XXH_NO_STREAM /* don't actually */
|
|
1270
1674
|
#endif /* XXH_DOXYGEN */
|
|
1271
1675
|
/*!
|
|
1272
1676
|
* @}
|
|
1273
1677
|
*/
|
|
1274
1678
|
|
|
1275
1679
|
#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
|
|
1276
|
-
/* prefer __packed__ structures (method 1) for
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1680
|
+
/* prefer __packed__ structures (method 1) for GCC
|
|
1681
|
+
* < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
|
|
1682
|
+
* which for some reason does unaligned loads. */
|
|
1683
|
+
# if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
|
|
1280
1684
|
# define XXH_FORCE_MEMORY_ACCESS 1
|
|
1281
1685
|
# endif
|
|
1282
1686
|
#endif
|
|
1283
1687
|
|
|
1284
|
-
#ifndef
|
|
1285
|
-
|
|
1688
|
+
#ifndef XXH_SIZE_OPT
|
|
1689
|
+
/* default to 1 for -Os or -Oz */
|
|
1690
|
+
# if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
|
|
1691
|
+
# define XXH_SIZE_OPT 1
|
|
1692
|
+
# else
|
|
1693
|
+
# define XXH_SIZE_OPT 0
|
|
1694
|
+
# endif
|
|
1286
1695
|
#endif
|
|
1287
1696
|
|
|
1288
1697
|
#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
|
|
1289
|
-
|
|
1290
|
-
|
|
1698
|
+
/* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
|
|
1699
|
+
# if XXH_SIZE_OPT >= 1 || \
|
|
1700
|
+
defined(__i386) || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
|
|
1701
|
+
|| defined(_M_IX86) || defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM) \
|
|
1702
|
+
|| defined(__loongarch64) /* visual */
|
|
1291
1703
|
# define XXH_FORCE_ALIGN_CHECK 0
|
|
1292
1704
|
# else
|
|
1293
1705
|
# define XXH_FORCE_ALIGN_CHECK 1
|
|
@@ -1295,20 +1707,16 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1295
1707
|
#endif
|
|
1296
1708
|
|
|
1297
1709
|
#ifndef XXH_NO_INLINE_HINTS
|
|
1298
|
-
# if defined(
|
|
1299
|
-
|| defined(__NO_INLINE__) /* -O0, -fno-inline */
|
|
1710
|
+
# if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__) /* -O0, -fno-inline */
|
|
1300
1711
|
# define XXH_NO_INLINE_HINTS 1
|
|
1301
1712
|
# else
|
|
1302
1713
|
# define XXH_NO_INLINE_HINTS 0
|
|
1303
1714
|
# endif
|
|
1304
1715
|
#endif
|
|
1305
1716
|
|
|
1306
|
-
#ifndef
|
|
1307
|
-
|
|
1308
|
-
#
|
|
1309
|
-
# else
|
|
1310
|
-
# define XXH_REROLL 0
|
|
1311
|
-
# endif
|
|
1717
|
+
#ifndef XXH32_ENDJMP
|
|
1718
|
+
/* generally preferable for performance */
|
|
1719
|
+
# define XXH32_ENDJMP 0
|
|
1312
1720
|
#endif
|
|
1313
1721
|
|
|
1314
1722
|
/*!
|
|
@@ -1320,6 +1728,24 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1320
1728
|
/* *************************************
|
|
1321
1729
|
* Includes & Memory related functions
|
|
1322
1730
|
***************************************/
|
|
1731
|
+
#if defined(XXH_NO_STREAM)
|
|
1732
|
+
/* nothing */
|
|
1733
|
+
#elif defined(XXH_NO_STDLIB)
|
|
1734
|
+
|
|
1735
|
+
/* When requesting to disable any mention of stdlib,
|
|
1736
|
+
* the library loses the ability to invoked malloc / free.
|
|
1737
|
+
* In practice, it means that functions like `XXH*_createState()`
|
|
1738
|
+
* will always fail, and return NULL.
|
|
1739
|
+
* This flag is useful in situations where
|
|
1740
|
+
* xxhash.h is integrated into some kernel, embedded or limited environment
|
|
1741
|
+
* without access to dynamic allocation.
|
|
1742
|
+
*/
|
|
1743
|
+
|
|
1744
|
+
static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
|
|
1745
|
+
static void XXH_free(void* p) { (void)p; }
|
|
1746
|
+
|
|
1747
|
+
#else
|
|
1748
|
+
|
|
1323
1749
|
/*
|
|
1324
1750
|
* Modify the local functions below should you wish to use
|
|
1325
1751
|
* different memory routines for malloc() and free()
|
|
@@ -1330,7 +1756,7 @@ XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t s
|
|
|
1330
1756
|
* @internal
|
|
1331
1757
|
* @brief Modify this function to use a different routine than malloc().
|
|
1332
1758
|
*/
|
|
1333
|
-
static void* XXH_malloc(size_t s) { return malloc(s); }
|
|
1759
|
+
static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
|
|
1334
1760
|
|
|
1335
1761
|
/*!
|
|
1336
1762
|
* @internal
|
|
@@ -1338,6 +1764,8 @@ static void* XXH_malloc(size_t s) { return malloc(s); }
|
|
|
1338
1764
|
*/
|
|
1339
1765
|
static void XXH_free(void* p) { free(p); }
|
|
1340
1766
|
|
|
1767
|
+
#endif /* XXH_NO_STDLIB */
|
|
1768
|
+
|
|
1341
1769
|
#include <string.h>
|
|
1342
1770
|
|
|
1343
1771
|
/*!
|
|
@@ -1360,19 +1788,19 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
|
1360
1788
|
#endif
|
|
1361
1789
|
|
|
1362
1790
|
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
|
|
1363
|
-
# if defined(__GNUC__)
|
|
1791
|
+
# if defined(__GNUC__) || defined(__clang__)
|
|
1364
1792
|
# define XXH_FORCE_INLINE static __attribute__((unused))
|
|
1365
1793
|
# else
|
|
1366
1794
|
# define XXH_FORCE_INLINE static
|
|
1367
1795
|
# endif
|
|
1368
1796
|
# define XXH_NO_INLINE static
|
|
1369
1797
|
/* enable inlining hints */
|
|
1798
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
|
1799
|
+
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
|
1800
|
+
# define XXH_NO_INLINE static __attribute__((noinline))
|
|
1370
1801
|
#elif defined(_MSC_VER) /* Visual Studio */
|
|
1371
1802
|
# define XXH_FORCE_INLINE static __forceinline
|
|
1372
1803
|
# define XXH_NO_INLINE static __declspec(noinline)
|
|
1373
|
-
#elif defined(__GNUC__)
|
|
1374
|
-
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
|
1375
|
-
# define XXH_NO_INLINE static __attribute__((noinline))
|
|
1376
1804
|
#elif defined (__cplusplus) \
|
|
1377
1805
|
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
|
|
1378
1806
|
# define XXH_FORCE_INLINE static inline
|
|
@@ -1407,11 +1835,20 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
|
1407
1835
|
# include <assert.h> /* note: can still be disabled with NDEBUG */
|
|
1408
1836
|
# define XXH_ASSERT(c) assert(c)
|
|
1409
1837
|
#else
|
|
1410
|
-
# define XXH_ASSERT(c) (
|
|
1838
|
+
# define XXH_ASSERT(c) XXH_ASSUME(c)
|
|
1411
1839
|
#endif
|
|
1412
1840
|
|
|
1413
1841
|
/* note: use after variable declarations */
|
|
1414
|
-
#
|
|
1842
|
+
#ifndef XXH_STATIC_ASSERT
|
|
1843
|
+
# if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11 */
|
|
1844
|
+
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
|
|
1845
|
+
# elif defined(__cplusplus) && (__cplusplus >= 201103L) /* C++11 */
|
|
1846
|
+
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
|
|
1847
|
+
# else
|
|
1848
|
+
# define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
|
|
1849
|
+
# endif
|
|
1850
|
+
# define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
|
|
1851
|
+
#endif
|
|
1415
1852
|
|
|
1416
1853
|
/*!
|
|
1417
1854
|
* @internal
|
|
@@ -1429,12 +1866,18 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
|
|
1429
1866
|
* We also use it to prevent unwanted constant folding for AArch64 in
|
|
1430
1867
|
* XXH3_initCustomSecret_scalar().
|
|
1431
1868
|
*/
|
|
1432
|
-
#
|
|
1869
|
+
#if defined(__GNUC__) || defined(__clang__)
|
|
1433
1870
|
# define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
|
|
1434
1871
|
#else
|
|
1435
1872
|
# define XXH_COMPILER_GUARD(var) ((void)0)
|
|
1436
1873
|
#endif
|
|
1437
1874
|
|
|
1875
|
+
#if defined(__GNUC__) || defined(__clang__)
|
|
1876
|
+
# define XXH_COMPILER_GUARD_W(var) __asm__ __volatile__("" : "+w" (var))
|
|
1877
|
+
#else
|
|
1878
|
+
# define XXH_COMPILER_GUARD_W(var) ((void)0)
|
|
1879
|
+
#endif
|
|
1880
|
+
|
|
1438
1881
|
/* *************************************
|
|
1439
1882
|
* Basic Types
|
|
1440
1883
|
***************************************/
|
|
@@ -1522,30 +1965,31 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
|
|
|
1522
1965
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
|
1523
1966
|
|
|
1524
1967
|
/*
|
|
1525
|
-
*
|
|
1526
|
-
*
|
|
1527
|
-
*
|
|
1528
|
-
*
|
|
1968
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
|
1969
|
+
* documentation claimed that it only increased the alignment, but actually it
|
|
1970
|
+
* can decrease it on gcc, clang, and icc:
|
|
1971
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
|
1972
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
|
1529
1973
|
*/
|
|
1530
1974
|
#ifdef XXH_OLD_NAMES
|
|
1531
1975
|
typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
|
|
1532
1976
|
#endif
|
|
1533
1977
|
static xxh_u32 XXH_read32(const void* ptr)
|
|
1534
1978
|
{
|
|
1535
|
-
typedef
|
|
1536
|
-
return ((const
|
|
1979
|
+
typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
|
|
1980
|
+
return *((const xxh_unalign32*)ptr);
|
|
1537
1981
|
}
|
|
1538
1982
|
|
|
1539
1983
|
#else
|
|
1540
1984
|
|
|
1541
1985
|
/*
|
|
1542
1986
|
* Portable and safe solution. Generally efficient.
|
|
1543
|
-
* see: https://
|
|
1987
|
+
* see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
|
1544
1988
|
*/
|
|
1545
1989
|
static xxh_u32 XXH_read32(const void* memPtr)
|
|
1546
1990
|
{
|
|
1547
1991
|
xxh_u32 val;
|
|
1548
|
-
|
|
1992
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
|
1549
1993
|
return val;
|
|
1550
1994
|
}
|
|
1551
1995
|
|
|
@@ -1553,6 +1997,7 @@ static xxh_u32 XXH_read32(const void* memPtr)
|
|
|
1553
1997
|
|
|
1554
1998
|
|
|
1555
1999
|
/* *** Endianness *** */
|
|
2000
|
+
|
|
1556
2001
|
/*!
|
|
1557
2002
|
* @ingroup tuning
|
|
1558
2003
|
* @def XXH_CPU_LITTLE_ENDIAN
|
|
@@ -1561,8 +2006,8 @@ static xxh_u32 XXH_read32(const void* memPtr)
|
|
|
1561
2006
|
* Defined to 1 if the target is little endian, or 0 if it is big endian.
|
|
1562
2007
|
* It can be defined externally, for example on the compiler command line.
|
|
1563
2008
|
*
|
|
1564
|
-
* If it is not defined,
|
|
1565
|
-
* is used instead.
|
|
2009
|
+
* If it is not defined,
|
|
2010
|
+
* a runtime check (which is usually constant folded) is used instead.
|
|
1566
2011
|
*
|
|
1567
2012
|
* @note
|
|
1568
2013
|
* This is not necessarily defined to an integer constant.
|
|
@@ -1615,6 +2060,29 @@ static int XXH_isLittleEndian(void)
|
|
|
1615
2060
|
# define XXH_HAS_BUILTIN(x) 0
|
|
1616
2061
|
#endif
|
|
1617
2062
|
|
|
2063
|
+
|
|
2064
|
+
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L)
|
|
2065
|
+
/* C23 and future versions have standard "unreachable()" */
|
|
2066
|
+
# include <stddef.h>
|
|
2067
|
+
# define XXH_UNREACHABLE() unreachable()
|
|
2068
|
+
|
|
2069
|
+
#elif defined(__cplusplus) && (__cplusplus > 202002L)
|
|
2070
|
+
/* C++23 and future versions have std::unreachable() */
|
|
2071
|
+
# include <utility> /* std::unreachable() */
|
|
2072
|
+
# define XXH_UNREACHABLE() std::unreachable()
|
|
2073
|
+
|
|
2074
|
+
#elif XXH_HAS_BUILTIN(__builtin_unreachable)
|
|
2075
|
+
# define XXH_UNREACHABLE() __builtin_unreachable()
|
|
2076
|
+
|
|
2077
|
+
#elif defined(_MSC_VER)
|
|
2078
|
+
# define XXH_UNREACHABLE() __assume(0)
|
|
2079
|
+
|
|
2080
|
+
#else
|
|
2081
|
+
# define XXH_UNREACHABLE()
|
|
2082
|
+
#endif
|
|
2083
|
+
|
|
2084
|
+
#define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
|
|
2085
|
+
|
|
1618
2086
|
/*!
|
|
1619
2087
|
* @internal
|
|
1620
2088
|
* @def XXH_rotl32(x,r)
|
|
@@ -1737,8 +2205,10 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
|
|
|
1737
2205
|
*********************************************************************/
|
|
1738
2206
|
/*!
|
|
1739
2207
|
* @}
|
|
1740
|
-
* @defgroup
|
|
2208
|
+
* @defgroup XXH32_impl XXH32 implementation
|
|
1741
2209
|
* @ingroup impl
|
|
2210
|
+
*
|
|
2211
|
+
* Details on the XXH32 implementation.
|
|
1742
2212
|
* @{
|
|
1743
2213
|
*/
|
|
1744
2214
|
/* #define instead of static const, to be used as initializers */
|
|
@@ -1818,17 +2288,17 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
|
|
|
1818
2288
|
* The final mix ensures that all input bits have a chance to impact any bit in
|
|
1819
2289
|
* the output digest, resulting in an unbiased distribution.
|
|
1820
2290
|
*
|
|
1821
|
-
* @param
|
|
2291
|
+
* @param hash The hash to avalanche.
|
|
1822
2292
|
* @return The avalanched hash.
|
|
1823
2293
|
*/
|
|
1824
|
-
static xxh_u32 XXH32_avalanche(xxh_u32
|
|
2294
|
+
static xxh_u32 XXH32_avalanche(xxh_u32 hash)
|
|
1825
2295
|
{
|
|
1826
|
-
|
|
1827
|
-
|
|
1828
|
-
|
|
1829
|
-
|
|
1830
|
-
|
|
1831
|
-
return
|
|
2296
|
+
hash ^= hash >> 15;
|
|
2297
|
+
hash *= XXH_PRIME32_2;
|
|
2298
|
+
hash ^= hash >> 13;
|
|
2299
|
+
hash *= XXH_PRIME32_3;
|
|
2300
|
+
hash ^= hash >> 16;
|
|
2301
|
+
return hash;
|
|
1832
2302
|
}
|
|
1833
2303
|
|
|
1834
2304
|
#define XXH_get32bits(p) XXH_readLE32_align(p, align)
|
|
@@ -1841,28 +2311,31 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32)
|
|
|
1841
2311
|
* This final stage will digest them to ensure that all input bytes are present
|
|
1842
2312
|
* in the final mix.
|
|
1843
2313
|
*
|
|
1844
|
-
* @param
|
|
2314
|
+
* @param hash The hash to finalize.
|
|
1845
2315
|
* @param ptr The pointer to the remaining input.
|
|
1846
2316
|
* @param len The remaining length, modulo 16.
|
|
1847
2317
|
* @param align Whether @p ptr is aligned.
|
|
1848
2318
|
* @return The finalized hash.
|
|
2319
|
+
* @see XXH64_finalize().
|
|
1849
2320
|
*/
|
|
1850
|
-
static xxh_u32
|
|
1851
|
-
XXH32_finalize(xxh_u32
|
|
2321
|
+
static XXH_PUREF xxh_u32
|
|
2322
|
+
XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
1852
2323
|
{
|
|
1853
|
-
#define XXH_PROCESS1 do {
|
|
1854
|
-
|
|
1855
|
-
|
|
2324
|
+
#define XXH_PROCESS1 do { \
|
|
2325
|
+
hash += (*ptr++) * XXH_PRIME32_5; \
|
|
2326
|
+
hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1; \
|
|
1856
2327
|
} while (0)
|
|
1857
2328
|
|
|
1858
|
-
#define XXH_PROCESS4 do {
|
|
1859
|
-
|
|
1860
|
-
ptr += 4;
|
|
1861
|
-
|
|
2329
|
+
#define XXH_PROCESS4 do { \
|
|
2330
|
+
hash += XXH_get32bits(ptr) * XXH_PRIME32_3; \
|
|
2331
|
+
ptr += 4; \
|
|
2332
|
+
hash = XXH_rotl32(hash, 17) * XXH_PRIME32_4; \
|
|
1862
2333
|
} while (0)
|
|
1863
2334
|
|
|
1864
|
-
|
|
1865
|
-
|
|
2335
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
|
2336
|
+
|
|
2337
|
+
/* Compact rerolled version; generally faster */
|
|
2338
|
+
if (!XXH32_ENDJMP) {
|
|
1866
2339
|
len &= 15;
|
|
1867
2340
|
while (len >= 4) {
|
|
1868
2341
|
XXH_PROCESS4;
|
|
@@ -1872,49 +2345,49 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
|
1872
2345
|
XXH_PROCESS1;
|
|
1873
2346
|
--len;
|
|
1874
2347
|
}
|
|
1875
|
-
return XXH32_avalanche(
|
|
2348
|
+
return XXH32_avalanche(hash);
|
|
1876
2349
|
} else {
|
|
1877
2350
|
switch(len&15) /* or switch(bEnd - p) */ {
|
|
1878
2351
|
case 12: XXH_PROCESS4;
|
|
1879
|
-
|
|
2352
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1880
2353
|
case 8: XXH_PROCESS4;
|
|
1881
|
-
|
|
2354
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1882
2355
|
case 4: XXH_PROCESS4;
|
|
1883
|
-
return XXH32_avalanche(
|
|
2356
|
+
return XXH32_avalanche(hash);
|
|
1884
2357
|
|
|
1885
2358
|
case 13: XXH_PROCESS4;
|
|
1886
|
-
|
|
2359
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1887
2360
|
case 9: XXH_PROCESS4;
|
|
1888
|
-
|
|
2361
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1889
2362
|
case 5: XXH_PROCESS4;
|
|
1890
2363
|
XXH_PROCESS1;
|
|
1891
|
-
return XXH32_avalanche(
|
|
2364
|
+
return XXH32_avalanche(hash);
|
|
1892
2365
|
|
|
1893
2366
|
case 14: XXH_PROCESS4;
|
|
1894
|
-
|
|
2367
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1895
2368
|
case 10: XXH_PROCESS4;
|
|
1896
|
-
|
|
2369
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1897
2370
|
case 6: XXH_PROCESS4;
|
|
1898
2371
|
XXH_PROCESS1;
|
|
1899
2372
|
XXH_PROCESS1;
|
|
1900
|
-
return XXH32_avalanche(
|
|
2373
|
+
return XXH32_avalanche(hash);
|
|
1901
2374
|
|
|
1902
2375
|
case 15: XXH_PROCESS4;
|
|
1903
|
-
|
|
2376
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1904
2377
|
case 11: XXH_PROCESS4;
|
|
1905
|
-
|
|
2378
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1906
2379
|
case 7: XXH_PROCESS4;
|
|
1907
|
-
|
|
2380
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1908
2381
|
case 3: XXH_PROCESS1;
|
|
1909
|
-
|
|
2382
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1910
2383
|
case 2: XXH_PROCESS1;
|
|
1911
|
-
|
|
2384
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
1912
2385
|
case 1: XXH_PROCESS1;
|
|
1913
|
-
|
|
1914
|
-
case 0: return XXH32_avalanche(
|
|
2386
|
+
XXH_FALLTHROUGH; /* fallthrough */
|
|
2387
|
+
case 0: return XXH32_avalanche(hash);
|
|
1915
2388
|
}
|
|
1916
2389
|
XXH_ASSERT(0);
|
|
1917
|
-
return
|
|
2390
|
+
return hash; /* reaching this point is deemed impossible */
|
|
1918
2391
|
}
|
|
1919
2392
|
}
|
|
1920
2393
|
|
|
@@ -1930,24 +2403,19 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
|
1930
2403
|
* @internal
|
|
1931
2404
|
* @brief The implementation for @ref XXH32().
|
|
1932
2405
|
*
|
|
1933
|
-
* @param input, len, seed Directly passed from @ref XXH32().
|
|
2406
|
+
* @param input , len , seed Directly passed from @ref XXH32().
|
|
1934
2407
|
* @param align Whether @p input is aligned.
|
|
1935
2408
|
* @return The calculated hash.
|
|
1936
2409
|
*/
|
|
1937
|
-
XXH_FORCE_INLINE xxh_u32
|
|
2410
|
+
XXH_FORCE_INLINE XXH_PUREF xxh_u32
|
|
1938
2411
|
XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
|
|
1939
2412
|
{
|
|
1940
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
|
1941
2413
|
xxh_u32 h32;
|
|
1942
2414
|
|
|
1943
|
-
|
|
1944
|
-
if (input==NULL) {
|
|
1945
|
-
len=0;
|
|
1946
|
-
bEnd=input=(const xxh_u8*)(size_t)16;
|
|
1947
|
-
}
|
|
1948
|
-
#endif
|
|
2415
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
|
1949
2416
|
|
|
1950
2417
|
if (len>=16) {
|
|
2418
|
+
const xxh_u8* const bEnd = input + len;
|
|
1951
2419
|
const xxh_u8* const limit = bEnd - 15;
|
|
1952
2420
|
xxh_u32 v1 = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
|
1953
2421
|
xxh_u32 v2 = seed + XXH_PRIME32_2;
|
|
@@ -1972,10 +2440,10 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
|
|
|
1972
2440
|
return XXH32_finalize(h32, input, len&15, align);
|
|
1973
2441
|
}
|
|
1974
2442
|
|
|
1975
|
-
/*! @ingroup
|
|
2443
|
+
/*! @ingroup XXH32_family */
|
|
1976
2444
|
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
|
|
1977
2445
|
{
|
|
1978
|
-
#if
|
|
2446
|
+
#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
|
|
1979
2447
|
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
|
1980
2448
|
XXH32_state_t state;
|
|
1981
2449
|
XXH32_reset(&state, seed);
|
|
@@ -1994,51 +2462,46 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s
|
|
|
1994
2462
|
|
|
1995
2463
|
|
|
1996
2464
|
/******* Hash streaming *******/
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
*/
|
|
2465
|
+
#ifndef XXH_NO_STREAM
|
|
2466
|
+
/*! @ingroup XXH32_family */
|
|
2000
2467
|
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
|
|
2001
2468
|
{
|
|
2002
2469
|
return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
|
|
2003
2470
|
}
|
|
2004
|
-
/*! @ingroup
|
|
2471
|
+
/*! @ingroup XXH32_family */
|
|
2005
2472
|
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
|
|
2006
2473
|
{
|
|
2007
2474
|
XXH_free(statePtr);
|
|
2008
2475
|
return XXH_OK;
|
|
2009
2476
|
}
|
|
2010
2477
|
|
|
2011
|
-
/*! @ingroup
|
|
2478
|
+
/*! @ingroup XXH32_family */
|
|
2012
2479
|
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
|
|
2013
2480
|
{
|
|
2014
|
-
|
|
2481
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
|
2015
2482
|
}
|
|
2016
2483
|
|
|
2017
|
-
/*! @ingroup
|
|
2484
|
+
/*! @ingroup XXH32_family */
|
|
2018
2485
|
XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
|
|
2019
2486
|
{
|
|
2020
|
-
|
|
2021
|
-
memset(
|
|
2022
|
-
|
|
2023
|
-
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
/* do not write into reserved, planned to be removed in a future version */
|
|
2027
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
|
|
2487
|
+
XXH_ASSERT(statePtr != NULL);
|
|
2488
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
|
2489
|
+
statePtr->v[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
|
|
2490
|
+
statePtr->v[1] = seed + XXH_PRIME32_2;
|
|
2491
|
+
statePtr->v[2] = seed + 0;
|
|
2492
|
+
statePtr->v[3] = seed - XXH_PRIME32_1;
|
|
2028
2493
|
return XXH_OK;
|
|
2029
2494
|
}
|
|
2030
2495
|
|
|
2031
2496
|
|
|
2032
|
-
/*! @ingroup
|
|
2497
|
+
/*! @ingroup XXH32_family */
|
|
2033
2498
|
XXH_PUBLIC_API XXH_errorcode
|
|
2034
2499
|
XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
2035
2500
|
{
|
|
2036
|
-
if (input==NULL)
|
|
2037
|
-
|
|
2501
|
+
if (input==NULL) {
|
|
2502
|
+
XXH_ASSERT(len == 0);
|
|
2038
2503
|
return XXH_OK;
|
|
2039
|
-
|
|
2040
|
-
return XXH_ERROR;
|
|
2041
|
-
#endif
|
|
2504
|
+
}
|
|
2042
2505
|
|
|
2043
2506
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
|
2044
2507
|
const xxh_u8* const bEnd = p + len;
|
|
@@ -2055,35 +2518,25 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
|
2055
2518
|
if (state->memsize) { /* some data left from previous update */
|
|
2056
2519
|
XXH_memcpy((xxh_u8*)(state->mem32) + state->memsize, input, 16-state->memsize);
|
|
2057
2520
|
{ const xxh_u32* p32 = state->mem32;
|
|
2058
|
-
state->
|
|
2059
|
-
state->
|
|
2060
|
-
state->
|
|
2061
|
-
state->
|
|
2521
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p32)); p32++;
|
|
2522
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p32)); p32++;
|
|
2523
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p32)); p32++;
|
|
2524
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p32));
|
|
2062
2525
|
}
|
|
2063
2526
|
p += 16-state->memsize;
|
|
2064
2527
|
state->memsize = 0;
|
|
2065
2528
|
}
|
|
2066
2529
|
|
|
2067
|
-
|
|
2068
|
-
|
|
2069
|
-
if ((uintptr_t)p <= (uintptr_t)bEnd - 16) {
|
|
2070
|
-
const uintptr_t limit = (uintptr_t)bEnd - 16;
|
|
2071
|
-
xxh_u32 v1 = state->v1;
|
|
2072
|
-
xxh_u32 v2 = state->v2;
|
|
2073
|
-
xxh_u32 v3 = state->v3;
|
|
2074
|
-
xxh_u32 v4 = state->v4;
|
|
2530
|
+
if (p <= bEnd-16) {
|
|
2531
|
+
const xxh_u8* const limit = bEnd - 16;
|
|
2075
2532
|
|
|
2076
2533
|
do {
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
|
|
2080
|
-
|
|
2081
|
-
} while (
|
|
2082
|
-
|
|
2083
|
-
state->v1 = v1;
|
|
2084
|
-
state->v2 = v2;
|
|
2085
|
-
state->v3 = v3;
|
|
2086
|
-
state->v4 = v4;
|
|
2534
|
+
state->v[0] = XXH32_round(state->v[0], XXH_readLE32(p)); p+=4;
|
|
2535
|
+
state->v[1] = XXH32_round(state->v[1], XXH_readLE32(p)); p+=4;
|
|
2536
|
+
state->v[2] = XXH32_round(state->v[2], XXH_readLE32(p)); p+=4;
|
|
2537
|
+
state->v[3] = XXH32_round(state->v[3], XXH_readLE32(p)); p+=4;
|
|
2538
|
+
} while (p<=limit);
|
|
2539
|
+
|
|
2087
2540
|
}
|
|
2088
2541
|
|
|
2089
2542
|
if (p < bEnd) {
|
|
@@ -2096,30 +2549,30 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
|
|
|
2096
2549
|
}
|
|
2097
2550
|
|
|
2098
2551
|
|
|
2099
|
-
/*! @ingroup
|
|
2552
|
+
/*! @ingroup XXH32_family */
|
|
2100
2553
|
XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
|
|
2101
2554
|
{
|
|
2102
2555
|
xxh_u32 h32;
|
|
2103
2556
|
|
|
2104
2557
|
if (state->large_len) {
|
|
2105
|
-
h32 = XXH_rotl32(state->
|
|
2106
|
-
+ XXH_rotl32(state->
|
|
2107
|
-
+ XXH_rotl32(state->
|
|
2108
|
-
+ XXH_rotl32(state->
|
|
2558
|
+
h32 = XXH_rotl32(state->v[0], 1)
|
|
2559
|
+
+ XXH_rotl32(state->v[1], 7)
|
|
2560
|
+
+ XXH_rotl32(state->v[2], 12)
|
|
2561
|
+
+ XXH_rotl32(state->v[3], 18);
|
|
2109
2562
|
} else {
|
|
2110
|
-
h32 = state->
|
|
2563
|
+
h32 = state->v[2] /* == seed */ + XXH_PRIME32_5;
|
|
2111
2564
|
}
|
|
2112
2565
|
|
|
2113
2566
|
h32 += state->total_len_32;
|
|
2114
2567
|
|
|
2115
2568
|
return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
|
|
2116
2569
|
}
|
|
2117
|
-
|
|
2570
|
+
#endif /* !XXH_NO_STREAM */
|
|
2118
2571
|
|
|
2119
2572
|
/******* Canonical representation *******/
|
|
2120
2573
|
|
|
2121
2574
|
/*!
|
|
2122
|
-
* @ingroup
|
|
2575
|
+
* @ingroup XXH32_family
|
|
2123
2576
|
* The default return values from XXH functions are unsigned 32 and 64 bit
|
|
2124
2577
|
* integers.
|
|
2125
2578
|
*
|
|
@@ -2136,9 +2589,9 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
|
|
|
2136
2589
|
{
|
|
2137
2590
|
XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
|
|
2138
2591
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
|
|
2139
|
-
|
|
2592
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
|
2140
2593
|
}
|
|
2141
|
-
/*! @ingroup
|
|
2594
|
+
/*! @ingroup XXH32_family */
|
|
2142
2595
|
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
|
|
2143
2596
|
{
|
|
2144
2597
|
return XXH_readBE32(src);
|
|
@@ -2179,30 +2632,31 @@ static xxh_u64 XXH_read64(const void* memPtr)
|
|
|
2179
2632
|
#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
|
|
2180
2633
|
|
|
2181
2634
|
/*
|
|
2182
|
-
*
|
|
2183
|
-
*
|
|
2184
|
-
*
|
|
2185
|
-
*
|
|
2635
|
+
* __attribute__((aligned(1))) is supported by gcc and clang. Originally the
|
|
2636
|
+
* documentation claimed that it only increased the alignment, but actually it
|
|
2637
|
+
* can decrease it on gcc, clang, and icc:
|
|
2638
|
+
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
|
|
2639
|
+
* https://gcc.godbolt.org/z/xYez1j67Y.
|
|
2186
2640
|
*/
|
|
2187
2641
|
#ifdef XXH_OLD_NAMES
|
|
2188
2642
|
typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
|
|
2189
2643
|
#endif
|
|
2190
2644
|
static xxh_u64 XXH_read64(const void* ptr)
|
|
2191
2645
|
{
|
|
2192
|
-
typedef
|
|
2193
|
-
return ((const xxh_unalign64*)ptr)
|
|
2646
|
+
typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
|
|
2647
|
+
return *((const xxh_unalign64*)ptr);
|
|
2194
2648
|
}
|
|
2195
2649
|
|
|
2196
2650
|
#else
|
|
2197
2651
|
|
|
2198
2652
|
/*
|
|
2199
2653
|
* Portable and safe solution. Generally efficient.
|
|
2200
|
-
* see: https://
|
|
2654
|
+
* see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
|
|
2201
2655
|
*/
|
|
2202
2656
|
static xxh_u64 XXH_read64(const void* memPtr)
|
|
2203
2657
|
{
|
|
2204
2658
|
xxh_u64 val;
|
|
2205
|
-
|
|
2659
|
+
XXH_memcpy(&val, memPtr, sizeof(val));
|
|
2206
2660
|
return val;
|
|
2207
2661
|
}
|
|
2208
2662
|
|
|
@@ -2281,8 +2735,10 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
|
|
|
2281
2735
|
/******* xxh64 *******/
|
|
2282
2736
|
/*!
|
|
2283
2737
|
* @}
|
|
2284
|
-
* @defgroup
|
|
2738
|
+
* @defgroup XXH64_impl XXH64 implementation
|
|
2285
2739
|
* @ingroup impl
|
|
2740
|
+
*
|
|
2741
|
+
* Details on the XXH64 implementation.
|
|
2286
2742
|
* @{
|
|
2287
2743
|
*/
|
|
2288
2744
|
/* #define rather that static const, to be used as initializers */
|
|
@@ -2300,6 +2756,7 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
|
|
|
2300
2756
|
# define PRIME64_5 XXH_PRIME64_5
|
|
2301
2757
|
#endif
|
|
2302
2758
|
|
|
2759
|
+
/*! @copydoc XXH32_round */
|
|
2303
2760
|
static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
|
|
2304
2761
|
{
|
|
2305
2762
|
acc += input * XXH_PRIME64_2;
|
|
@@ -2316,42 +2773,59 @@ static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
|
|
|
2316
2773
|
return acc;
|
|
2317
2774
|
}
|
|
2318
2775
|
|
|
2319
|
-
|
|
2776
|
+
/*! @copydoc XXH32_avalanche */
|
|
2777
|
+
static xxh_u64 XXH64_avalanche(xxh_u64 hash)
|
|
2320
2778
|
{
|
|
2321
|
-
|
|
2322
|
-
|
|
2323
|
-
|
|
2324
|
-
|
|
2325
|
-
|
|
2326
|
-
return
|
|
2779
|
+
hash ^= hash >> 33;
|
|
2780
|
+
hash *= XXH_PRIME64_2;
|
|
2781
|
+
hash ^= hash >> 29;
|
|
2782
|
+
hash *= XXH_PRIME64_3;
|
|
2783
|
+
hash ^= hash >> 32;
|
|
2784
|
+
return hash;
|
|
2327
2785
|
}
|
|
2328
2786
|
|
|
2329
2787
|
|
|
2330
2788
|
#define XXH_get64bits(p) XXH_readLE64_align(p, align)
|
|
2331
2789
|
|
|
2332
|
-
|
|
2333
|
-
|
|
2334
|
-
|
|
2335
|
-
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2339
|
-
|
|
2340
|
-
|
|
2790
|
+
/*!
|
|
2791
|
+
* @internal
|
|
2792
|
+
* @brief Processes the last 0-31 bytes of @p ptr.
|
|
2793
|
+
*
|
|
2794
|
+
* There may be up to 31 bytes remaining to consume from the input.
|
|
2795
|
+
* This final stage will digest them to ensure that all input bytes are present
|
|
2796
|
+
* in the final mix.
|
|
2797
|
+
*
|
|
2798
|
+
* @param hash The hash to finalize.
|
|
2799
|
+
* @param ptr The pointer to the remaining input.
|
|
2800
|
+
* @param len The remaining length, modulo 32.
|
|
2801
|
+
* @param align Whether @p ptr is aligned.
|
|
2802
|
+
* @return The finalized hash
|
|
2803
|
+
* @see XXH32_finalize().
|
|
2804
|
+
*/
|
|
2805
|
+
static XXH_PUREF xxh_u64
|
|
2806
|
+
XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
2807
|
+
{
|
|
2808
|
+
if (ptr==NULL) XXH_ASSERT(len == 0);
|
|
2809
|
+
len &= 31;
|
|
2810
|
+
while (len >= 8) {
|
|
2811
|
+
xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
|
|
2812
|
+
ptr += 8;
|
|
2813
|
+
hash ^= k1;
|
|
2814
|
+
hash = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
|
|
2341
2815
|
len -= 8;
|
|
2342
2816
|
}
|
|
2343
2817
|
if (len >= 4) {
|
|
2344
|
-
|
|
2818
|
+
hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
|
|
2345
2819
|
ptr += 4;
|
|
2346
|
-
|
|
2820
|
+
hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
|
|
2347
2821
|
len -= 4;
|
|
2348
2822
|
}
|
|
2349
2823
|
while (len > 0) {
|
|
2350
|
-
|
|
2351
|
-
|
|
2824
|
+
hash ^= (*ptr++) * XXH_PRIME64_5;
|
|
2825
|
+
hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
|
|
2352
2826
|
--len;
|
|
2353
2827
|
}
|
|
2354
|
-
return XXH64_avalanche(
|
|
2828
|
+
return XXH64_avalanche(hash);
|
|
2355
2829
|
}
|
|
2356
2830
|
|
|
2357
2831
|
#ifdef XXH_OLD_NAMES
|
|
@@ -2364,21 +2838,23 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
|
|
|
2364
2838
|
# undef XXH_PROCESS8_64
|
|
2365
2839
|
#endif
|
|
2366
2840
|
|
|
2367
|
-
|
|
2841
|
+
/*!
|
|
2842
|
+
* @internal
|
|
2843
|
+
* @brief The implementation for @ref XXH64().
|
|
2844
|
+
*
|
|
2845
|
+
* @param input , len , seed Directly passed from @ref XXH64().
|
|
2846
|
+
* @param align Whether @p input is aligned.
|
|
2847
|
+
* @return The calculated hash.
|
|
2848
|
+
*/
|
|
2849
|
+
XXH_FORCE_INLINE XXH_PUREF xxh_u64
|
|
2368
2850
|
XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
|
|
2369
2851
|
{
|
|
2370
|
-
const xxh_u8* bEnd = input ? input + len : NULL;
|
|
2371
2852
|
xxh_u64 h64;
|
|
2372
|
-
|
|
2373
|
-
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
|
|
2374
|
-
if (input==NULL) {
|
|
2375
|
-
len=0;
|
|
2376
|
-
bEnd=input=(const xxh_u8*)(size_t)32;
|
|
2377
|
-
}
|
|
2378
|
-
#endif
|
|
2853
|
+
if (input==NULL) XXH_ASSERT(len == 0);
|
|
2379
2854
|
|
|
2380
2855
|
if (len>=32) {
|
|
2381
|
-
const xxh_u8* const
|
|
2856
|
+
const xxh_u8* const bEnd = input + len;
|
|
2857
|
+
const xxh_u8* const limit = bEnd - 31;
|
|
2382
2858
|
xxh_u64 v1 = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
|
2383
2859
|
xxh_u64 v2 = seed + XXH_PRIME64_2;
|
|
2384
2860
|
xxh_u64 v3 = seed + 0;
|
|
@@ -2389,7 +2865,7 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
|
|
|
2389
2865
|
v2 = XXH64_round(v2, XXH_get64bits(input)); input+=8;
|
|
2390
2866
|
v3 = XXH64_round(v3, XXH_get64bits(input)); input+=8;
|
|
2391
2867
|
v4 = XXH64_round(v4, XXH_get64bits(input)); input+=8;
|
|
2392
|
-
} while (input
|
|
2868
|
+
} while (input<limit);
|
|
2393
2869
|
|
|
2394
2870
|
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
|
2395
2871
|
h64 = XXH64_mergeRound(h64, v1);
|
|
@@ -2407,10 +2883,10 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
|
|
|
2407
2883
|
}
|
|
2408
2884
|
|
|
2409
2885
|
|
|
2410
|
-
/*! @ingroup
|
|
2411
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
|
|
2886
|
+
/*! @ingroup XXH64_family */
|
|
2887
|
+
XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
|
|
2412
2888
|
{
|
|
2413
|
-
#if
|
|
2889
|
+
#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
|
|
2414
2890
|
/* Simple version, good for code maintenance, but unfortunately slow for small inputs */
|
|
2415
2891
|
XXH64_state_t state;
|
|
2416
2892
|
XXH64_reset(&state, seed);
|
|
@@ -2428,49 +2904,45 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
|
|
|
2428
2904
|
}
|
|
2429
2905
|
|
|
2430
2906
|
/******* Hash Streaming *******/
|
|
2431
|
-
|
|
2432
|
-
/*! @ingroup
|
|
2907
|
+
#ifndef XXH_NO_STREAM
|
|
2908
|
+
/*! @ingroup XXH64_family*/
|
|
2433
2909
|
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
|
|
2434
2910
|
{
|
|
2435
2911
|
return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
|
|
2436
2912
|
}
|
|
2437
|
-
/*! @ingroup
|
|
2913
|
+
/*! @ingroup XXH64_family */
|
|
2438
2914
|
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
|
|
2439
2915
|
{
|
|
2440
2916
|
XXH_free(statePtr);
|
|
2441
2917
|
return XXH_OK;
|
|
2442
2918
|
}
|
|
2443
2919
|
|
|
2444
|
-
/*! @ingroup
|
|
2445
|
-
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
|
|
2920
|
+
/*! @ingroup XXH64_family */
|
|
2921
|
+
XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
|
|
2446
2922
|
{
|
|
2447
|
-
|
|
2923
|
+
XXH_memcpy(dstState, srcState, sizeof(*dstState));
|
|
2448
2924
|
}
|
|
2449
2925
|
|
|
2450
|
-
/*! @ingroup
|
|
2451
|
-
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
|
|
2926
|
+
/*! @ingroup XXH64_family */
|
|
2927
|
+
XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
|
|
2452
2928
|
{
|
|
2453
|
-
|
|
2454
|
-
memset(
|
|
2455
|
-
|
|
2456
|
-
|
|
2457
|
-
|
|
2458
|
-
|
|
2459
|
-
/* do not write into reserved64, might be removed in a future version */
|
|
2460
|
-
memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved64));
|
|
2929
|
+
XXH_ASSERT(statePtr != NULL);
|
|
2930
|
+
memset(statePtr, 0, sizeof(*statePtr));
|
|
2931
|
+
statePtr->v[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
|
|
2932
|
+
statePtr->v[1] = seed + XXH_PRIME64_2;
|
|
2933
|
+
statePtr->v[2] = seed + 0;
|
|
2934
|
+
statePtr->v[3] = seed - XXH_PRIME64_1;
|
|
2461
2935
|
return XXH_OK;
|
|
2462
2936
|
}
|
|
2463
2937
|
|
|
2464
|
-
/*! @ingroup
|
|
2938
|
+
/*! @ingroup XXH64_family */
|
|
2465
2939
|
XXH_PUBLIC_API XXH_errorcode
|
|
2466
|
-
XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
2940
|
+
XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
|
|
2467
2941
|
{
|
|
2468
|
-
if (input==NULL)
|
|
2469
|
-
|
|
2942
|
+
if (input==NULL) {
|
|
2943
|
+
XXH_ASSERT(len == 0);
|
|
2470
2944
|
return XXH_OK;
|
|
2471
|
-
|
|
2472
|
-
return XXH_ERROR;
|
|
2473
|
-
#endif
|
|
2945
|
+
}
|
|
2474
2946
|
|
|
2475
2947
|
{ const xxh_u8* p = (const xxh_u8*)input;
|
|
2476
2948
|
const xxh_u8* const bEnd = p + len;
|
|
@@ -2485,34 +2957,24 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
|
2485
2957
|
|
|
2486
2958
|
if (state->memsize) { /* tmp buffer is full */
|
|
2487
2959
|
XXH_memcpy(((xxh_u8*)state->mem64) + state->memsize, input, 32-state->memsize);
|
|
2488
|
-
state->
|
|
2489
|
-
state->
|
|
2490
|
-
state->
|
|
2491
|
-
state->
|
|
2960
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(state->mem64+0));
|
|
2961
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(state->mem64+1));
|
|
2962
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(state->mem64+2));
|
|
2963
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(state->mem64+3));
|
|
2492
2964
|
p += 32 - state->memsize;
|
|
2493
2965
|
state->memsize = 0;
|
|
2494
2966
|
}
|
|
2495
2967
|
|
|
2496
|
-
|
|
2497
|
-
|
|
2498
|
-
if ((uintptr_t)p + 32 <= (uintptr_t)bEnd) {
|
|
2499
|
-
const uintptr_t limit = (uintptr_t)bEnd - 32;
|
|
2500
|
-
xxh_u64 v1 = state->v1;
|
|
2501
|
-
xxh_u64 v2 = state->v2;
|
|
2502
|
-
xxh_u64 v3 = state->v3;
|
|
2503
|
-
xxh_u64 v4 = state->v4;
|
|
2968
|
+
if (p+32 <= bEnd) {
|
|
2969
|
+
const xxh_u8* const limit = bEnd - 32;
|
|
2504
2970
|
|
|
2505
2971
|
do {
|
|
2506
|
-
|
|
2507
|
-
|
|
2508
|
-
|
|
2509
|
-
|
|
2510
|
-
} while (
|
|
2511
|
-
|
|
2512
|
-
state->v1 = v1;
|
|
2513
|
-
state->v2 = v2;
|
|
2514
|
-
state->v3 = v3;
|
|
2515
|
-
state->v4 = v4;
|
|
2972
|
+
state->v[0] = XXH64_round(state->v[0], XXH_readLE64(p)); p+=8;
|
|
2973
|
+
state->v[1] = XXH64_round(state->v[1], XXH_readLE64(p)); p+=8;
|
|
2974
|
+
state->v[2] = XXH64_round(state->v[2], XXH_readLE64(p)); p+=8;
|
|
2975
|
+
state->v[3] = XXH64_round(state->v[3], XXH_readLE64(p)); p+=8;
|
|
2976
|
+
} while (p<=limit);
|
|
2977
|
+
|
|
2516
2978
|
}
|
|
2517
2979
|
|
|
2518
2980
|
if (p < bEnd) {
|
|
@@ -2525,44 +2987,39 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
|
|
|
2525
2987
|
}
|
|
2526
2988
|
|
|
2527
2989
|
|
|
2528
|
-
/*! @ingroup
|
|
2529
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
|
|
2990
|
+
/*! @ingroup XXH64_family */
|
|
2991
|
+
XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
|
|
2530
2992
|
{
|
|
2531
2993
|
xxh_u64 h64;
|
|
2532
2994
|
|
|
2533
2995
|
if (state->total_len >= 32) {
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
|
|
2540
|
-
h64 = XXH64_mergeRound(h64, v1);
|
|
2541
|
-
h64 = XXH64_mergeRound(h64, v2);
|
|
2542
|
-
h64 = XXH64_mergeRound(h64, v3);
|
|
2543
|
-
h64 = XXH64_mergeRound(h64, v4);
|
|
2996
|
+
h64 = XXH_rotl64(state->v[0], 1) + XXH_rotl64(state->v[1], 7) + XXH_rotl64(state->v[2], 12) + XXH_rotl64(state->v[3], 18);
|
|
2997
|
+
h64 = XXH64_mergeRound(h64, state->v[0]);
|
|
2998
|
+
h64 = XXH64_mergeRound(h64, state->v[1]);
|
|
2999
|
+
h64 = XXH64_mergeRound(h64, state->v[2]);
|
|
3000
|
+
h64 = XXH64_mergeRound(h64, state->v[3]);
|
|
2544
3001
|
} else {
|
|
2545
|
-
h64 = state->
|
|
3002
|
+
h64 = state->v[2] /*seed*/ + XXH_PRIME64_5;
|
|
2546
3003
|
}
|
|
2547
3004
|
|
|
2548
3005
|
h64 += (xxh_u64) state->total_len;
|
|
2549
3006
|
|
|
2550
3007
|
return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
|
|
2551
3008
|
}
|
|
2552
|
-
|
|
3009
|
+
#endif /* !XXH_NO_STREAM */
|
|
2553
3010
|
|
|
2554
3011
|
/******* Canonical representation *******/
|
|
2555
3012
|
|
|
2556
|
-
/*! @ingroup
|
|
2557
|
-
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
|
|
3013
|
+
/*! @ingroup XXH64_family */
|
|
3014
|
+
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
|
|
2558
3015
|
{
|
|
2559
3016
|
XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
|
|
2560
3017
|
if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
|
|
2561
|
-
|
|
3018
|
+
XXH_memcpy(dst, &hash, sizeof(*dst));
|
|
2562
3019
|
}
|
|
2563
3020
|
|
|
2564
|
-
/*! @ingroup
|
|
2565
|
-
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
|
|
3021
|
+
/*! @ingroup XXH64_family */
|
|
3022
|
+
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
|
|
2566
3023
|
{
|
|
2567
3024
|
return XXH_readBE64(src);
|
|
2568
3025
|
}
|
|
@@ -2575,7 +3032,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
|
2575
3032
|
************************************************************************ */
|
|
2576
3033
|
/*!
|
|
2577
3034
|
* @}
|
|
2578
|
-
* @defgroup
|
|
3035
|
+
* @defgroup XXH3_impl XXH3 implementation
|
|
2579
3036
|
* @ingroup impl
|
|
2580
3037
|
* @{
|
|
2581
3038
|
*/
|
|
@@ -2601,17 +3058,23 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
|
|
|
2601
3058
|
# define XXH_unlikely(x) (x)
|
|
2602
3059
|
#endif
|
|
2603
3060
|
|
|
2604
|
-
#if defined(__GNUC__)
|
|
2605
|
-
# if defined(
|
|
2606
|
-
# include <
|
|
2607
|
-
# elif defined(
|
|
2608
|
-
|
|
2609
|
-
|
|
3061
|
+
#if defined(__GNUC__) || defined(__clang__)
|
|
3062
|
+
# if defined(__ARM_FEATURE_SVE)
|
|
3063
|
+
# include <arm_sve.h>
|
|
3064
|
+
# elif defined(__ARM_NEON__) || defined(__ARM_NEON) \
|
|
3065
|
+
|| defined(__aarch64__) || defined(_M_ARM) \
|
|
3066
|
+
|| defined(_M_ARM64) || defined(_M_ARM64EC)
|
|
2610
3067
|
# define inline __inline__ /* circumvent a clang bug */
|
|
2611
3068
|
# include <arm_neon.h>
|
|
2612
3069
|
# undef inline
|
|
3070
|
+
# elif defined(__AVX2__)
|
|
3071
|
+
# include <immintrin.h>
|
|
3072
|
+
# elif defined(__SSE2__)
|
|
3073
|
+
# include <emmintrin.h>
|
|
2613
3074
|
# endif
|
|
2614
|
-
#
|
|
3075
|
+
#endif
|
|
3076
|
+
|
|
3077
|
+
#if defined(_MSC_VER)
|
|
2615
3078
|
# include <intrin.h>
|
|
2616
3079
|
#endif
|
|
2617
3080
|
|
|
@@ -2725,12 +3188,13 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2725
3188
|
XXH_AVX512 = 3, /*!< AVX512 for Skylake and Icelake */
|
|
2726
3189
|
XXH_NEON = 4, /*!< NEON for most ARMv7-A and all AArch64 */
|
|
2727
3190
|
XXH_VSX = 5, /*!< VSX and ZVector for POWER8/z13 (64-bit) */
|
|
3191
|
+
XXH_SVE = 6, /*!< SVE for some ARMv8-A and ARMv9-A */
|
|
2728
3192
|
};
|
|
2729
3193
|
/*!
|
|
2730
3194
|
* @ingroup tuning
|
|
2731
3195
|
* @brief Selects the minimum alignment for XXH3's accumulators.
|
|
2732
3196
|
*
|
|
2733
|
-
* When using SIMD, this should match the alignment
|
|
3197
|
+
* When using SIMD, this should match the alignment required for said vector
|
|
2734
3198
|
* type, so, for example, 32 for AVX2.
|
|
2735
3199
|
*
|
|
2736
3200
|
* Default: Auto detected.
|
|
@@ -2746,20 +3210,26 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2746
3210
|
# define XXH_AVX512 3
|
|
2747
3211
|
# define XXH_NEON 4
|
|
2748
3212
|
# define XXH_VSX 5
|
|
3213
|
+
# define XXH_SVE 6
|
|
2749
3214
|
#endif
|
|
2750
3215
|
|
|
2751
3216
|
#ifndef XXH_VECTOR /* can be defined on command line */
|
|
2752
|
-
# if defined(
|
|
3217
|
+
# if defined(__ARM_FEATURE_SVE)
|
|
3218
|
+
# define XXH_VECTOR XXH_SVE
|
|
3219
|
+
# elif ( \
|
|
3220
|
+
defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
|
|
3221
|
+
|| defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
|
|
3222
|
+
) && ( \
|
|
3223
|
+
defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
|
|
3224
|
+
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
|
|
3225
|
+
)
|
|
3226
|
+
# define XXH_VECTOR XXH_NEON
|
|
3227
|
+
# elif defined(__AVX512F__)
|
|
2753
3228
|
# define XXH_VECTOR XXH_AVX512
|
|
2754
3229
|
# elif defined(__AVX2__)
|
|
2755
3230
|
# define XXH_VECTOR XXH_AVX2
|
|
2756
3231
|
# elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
|
|
2757
3232
|
# define XXH_VECTOR XXH_SSE2
|
|
2758
|
-
# elif defined(__GNUC__) /* msvc support maybe later */ \
|
|
2759
|
-
&& (defined(__ARM_NEON__) || defined(__ARM_NEON)) \
|
|
2760
|
-
&& (defined(__LITTLE_ENDIAN__) /* We only support little endian NEON */ \
|
|
2761
|
-
|| (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__))
|
|
2762
|
-
# define XXH_VECTOR XXH_NEON
|
|
2763
3233
|
# elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
|
|
2764
3234
|
|| (defined(__s390x__) && defined(__VEC__)) \
|
|
2765
3235
|
&& defined(__GNUC__) /* TODO: IBM XL */
|
|
@@ -2769,6 +3239,17 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2769
3239
|
# endif
|
|
2770
3240
|
#endif
|
|
2771
3241
|
|
|
3242
|
+
/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
|
|
3243
|
+
#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
|
|
3244
|
+
# ifdef _MSC_VER
|
|
3245
|
+
# pragma warning(once : 4606)
|
|
3246
|
+
# else
|
|
3247
|
+
# warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
|
|
3248
|
+
# endif
|
|
3249
|
+
# undef XXH_VECTOR
|
|
3250
|
+
# define XXH_VECTOR XXH_SCALAR
|
|
3251
|
+
#endif
|
|
3252
|
+
|
|
2772
3253
|
/*
|
|
2773
3254
|
* Controls the alignment of the accumulator,
|
|
2774
3255
|
* for compatibility with aligned vector loads, which are usually faster.
|
|
@@ -2788,12 +3269,16 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2788
3269
|
# define XXH_ACC_ALIGN 16
|
|
2789
3270
|
# elif XXH_VECTOR == XXH_AVX512 /* avx512 */
|
|
2790
3271
|
# define XXH_ACC_ALIGN 64
|
|
3272
|
+
# elif XXH_VECTOR == XXH_SVE /* sve */
|
|
3273
|
+
# define XXH_ACC_ALIGN 64
|
|
2791
3274
|
# endif
|
|
2792
3275
|
#endif
|
|
2793
3276
|
|
|
2794
3277
|
#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
|
|
2795
3278
|
|| XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
|
|
2796
3279
|
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
|
3280
|
+
#elif XXH_VECTOR == XXH_SVE
|
|
3281
|
+
# define XXH_SEC_ALIGN XXH_ACC_ALIGN
|
|
2797
3282
|
#else
|
|
2798
3283
|
# define XXH_SEC_ALIGN 8
|
|
2799
3284
|
#endif
|
|
@@ -2821,7 +3306,7 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2821
3306
|
*/
|
|
2822
3307
|
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
|
|
2823
3308
|
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
|
|
2824
|
-
&& defined(__OPTIMIZE__) &&
|
|
3309
|
+
&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
|
|
2825
3310
|
# pragma GCC push_options
|
|
2826
3311
|
# pragma GCC optimize("-O2")
|
|
2827
3312
|
#endif
|
|
@@ -2909,8 +3394,8 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2909
3394
|
* }
|
|
2910
3395
|
*/
|
|
2911
3396
|
# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
|
|
2912
|
-
&& defined(__GNUC__) \
|
|
2913
|
-
&&
|
|
3397
|
+
&& (defined(__GNUC__) || defined(__clang__)) \
|
|
3398
|
+
&& (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
|
|
2914
3399
|
# define XXH_SPLIT_IN_PLACE(in, outLo, outHi) \
|
|
2915
3400
|
do { \
|
|
2916
3401
|
/* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
|
|
@@ -2927,6 +3412,78 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2927
3412
|
(outHi) = vshrn_n_u64 ((in), 32); \
|
|
2928
3413
|
} while (0)
|
|
2929
3414
|
# endif
|
|
3415
|
+
|
|
3416
|
+
/*!
|
|
3417
|
+
* @internal
|
|
3418
|
+
* @brief `vld1q_u64` but faster and alignment-safe.
|
|
3419
|
+
*
|
|
3420
|
+
* On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
|
|
3421
|
+
* *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
|
|
3422
|
+
*
|
|
3423
|
+
* GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
|
|
3424
|
+
* prohibits load-store optimizations. Therefore, a direct dereference is used.
|
|
3425
|
+
*
|
|
3426
|
+
* Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
|
|
3427
|
+
* unaligned load.
|
|
3428
|
+
*/
|
|
3429
|
+
#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
|
|
3430
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
|
|
3431
|
+
{
|
|
3432
|
+
return *(uint64x2_t const*)ptr;
|
|
3433
|
+
}
|
|
3434
|
+
#else
|
|
3435
|
+
XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
|
|
3436
|
+
{
|
|
3437
|
+
return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
|
|
3438
|
+
}
|
|
3439
|
+
#endif
|
|
3440
|
+
/*!
|
|
3441
|
+
* @ingroup tuning
|
|
3442
|
+
* @brief Controls the NEON to scalar ratio for XXH3
|
|
3443
|
+
*
|
|
3444
|
+
* On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
|
|
3445
|
+
* 2 lanes on scalar by default (except on Apple platforms, as Apple CPUs benefit
|
|
3446
|
+
* from only using NEON).
|
|
3447
|
+
*
|
|
3448
|
+
* This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
|
|
3449
|
+
* emulated 64-bit arithmetic is too slow.
|
|
3450
|
+
*
|
|
3451
|
+
* Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
|
|
3452
|
+
*
|
|
3453
|
+
* For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
|
|
3454
|
+
* have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
|
|
3455
|
+
* you are only using 2/3 of the CPU bandwidth.
|
|
3456
|
+
*
|
|
3457
|
+
* This is even more noticeable on the more advanced cores like the A76 which
|
|
3458
|
+
* can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
|
|
3459
|
+
*
|
|
3460
|
+
* Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
|
|
3461
|
+
* remaining lanes will use scalar instructions. This improves the bandwidth
|
|
3462
|
+
* and also gives the integer pipelines something to do besides twiddling loop
|
|
3463
|
+
* counters and pointers.
|
|
3464
|
+
*
|
|
3465
|
+
* This change benefits CPUs with large micro-op buffers without negatively affecting
|
|
3466
|
+
* most other CPUs:
|
|
3467
|
+
*
|
|
3468
|
+
* | Chipset | Dispatch type | NEON only | 6:2 hybrid | Diff. |
|
|
3469
|
+
* |:----------------------|:--------------------|----------:|-----------:|------:|
|
|
3470
|
+
* | Snapdragon 730 (A76) | 2 NEON/8 micro-ops | 8.8 GB/s | 10.1 GB/s | ~16% |
|
|
3471
|
+
* | Snapdragon 835 (A73) | 2 NEON/3 micro-ops | 5.1 GB/s | 5.3 GB/s | ~5% |
|
|
3472
|
+
* | Marvell PXA1928 (A53) | In-order dual-issue | 1.9 GB/s | 1.9 GB/s | 0% |
|
|
3473
|
+
* | Apple M1 | 4 NEON/8 micro-ops | 37.3 GB/s | 36.1 GB/s | ~-3% |
|
|
3474
|
+
*
|
|
3475
|
+
* It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
|
|
3476
|
+
*
|
|
3477
|
+
* @see XXH3_accumulate_512_neon()
|
|
3478
|
+
*/
|
|
3479
|
+
# ifndef XXH3_NEON_LANES
|
|
3480
|
+
# if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
|
|
3481
|
+
&& !defined(__APPLE__) && XXH_SIZE_OPT <= 0
|
|
3482
|
+
# define XXH3_NEON_LANES 6
|
|
3483
|
+
# else
|
|
3484
|
+
# define XXH3_NEON_LANES XXH_ACC_NB
|
|
3485
|
+
# endif
|
|
3486
|
+
# endif
|
|
2930
3487
|
#endif /* XXH_VECTOR == XXH_NEON */
|
|
2931
3488
|
|
|
2932
3489
|
/*
|
|
@@ -2938,23 +3495,33 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
|
|
|
2938
3495
|
* inconsistent intrinsics, spotty coverage, and multiple endiannesses.
|
|
2939
3496
|
*/
|
|
2940
3497
|
#if XXH_VECTOR == XXH_VSX
|
|
3498
|
+
/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
|
|
3499
|
+
* and `pixel`. This is a problem for obvious reasons.
|
|
3500
|
+
*
|
|
3501
|
+
* These keywords are unnecessary; the spec literally says they are
|
|
3502
|
+
* equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
|
|
3503
|
+
* after including the header.
|
|
3504
|
+
*
|
|
3505
|
+
* We use pragma push_macro/pop_macro to keep the namespace clean. */
|
|
3506
|
+
# pragma push_macro("bool")
|
|
3507
|
+
# pragma push_macro("vector")
|
|
3508
|
+
# pragma push_macro("pixel")
|
|
3509
|
+
/* silence potential macro redefined warnings */
|
|
3510
|
+
# undef bool
|
|
3511
|
+
# undef vector
|
|
3512
|
+
# undef pixel
|
|
3513
|
+
|
|
2941
3514
|
# if defined(__s390x__)
|
|
2942
3515
|
# include <s390intrin.h>
|
|
2943
3516
|
# else
|
|
2944
|
-
/* gcc's altivec.h can have the unwanted consequence to unconditionally
|
|
2945
|
-
* #define bool, vector, and pixel keywords,
|
|
2946
|
-
* with bad consequences for programs already using these keywords for other purposes.
|
|
2947
|
-
* The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
|
|
2948
|
-
* __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
|
|
2949
|
-
* but it seems that, in some cases, it isn't.
|
|
2950
|
-
* Force the build macro to be defined, so that keywords are not altered.
|
|
2951
|
-
*/
|
|
2952
|
-
# if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
|
|
2953
|
-
# define __APPLE_ALTIVEC__
|
|
2954
|
-
# endif
|
|
2955
3517
|
# include <altivec.h>
|
|
2956
3518
|
# endif
|
|
2957
3519
|
|
|
3520
|
+
/* Restore the original macro values, if applicable. */
|
|
3521
|
+
# pragma pop_macro("pixel")
|
|
3522
|
+
# pragma pop_macro("vector")
|
|
3523
|
+
# pragma pop_macro("bool")
|
|
3524
|
+
|
|
2958
3525
|
typedef __vector unsigned long long xxh_u64x2;
|
|
2959
3526
|
typedef __vector unsigned char xxh_u8x16;
|
|
2960
3527
|
typedef __vector unsigned xxh_u32x4;
|
|
@@ -2993,7 +3560,7 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
|
|
|
2993
3560
|
XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
|
|
2994
3561
|
{
|
|
2995
3562
|
xxh_u64x2 ret;
|
|
2996
|
-
|
|
3563
|
+
XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
|
|
2997
3564
|
# if XXH_VSX_BE
|
|
2998
3565
|
ret = XXH_vec_revb(ret);
|
|
2999
3566
|
# endif
|
|
@@ -3010,8 +3577,9 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
|
|
|
3010
3577
|
/* s390x is always big endian, no issue on this platform */
|
|
3011
3578
|
# define XXH_vec_mulo vec_mulo
|
|
3012
3579
|
# define XXH_vec_mule vec_mule
|
|
3013
|
-
# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
|
|
3580
|
+
# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
|
|
3014
3581
|
/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
|
|
3582
|
+
/* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
|
|
3015
3583
|
# define XXH_vec_mulo __builtin_altivec_vmulouw
|
|
3016
3584
|
# define XXH_vec_mule __builtin_altivec_vmuleuw
|
|
3017
3585
|
# else
|
|
@@ -3032,13 +3600,29 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
|
|
|
3032
3600
|
# endif /* XXH_vec_mulo, XXH_vec_mule */
|
|
3033
3601
|
#endif /* XXH_VECTOR == XXH_VSX */
|
|
3034
3602
|
|
|
3603
|
+
#if XXH_VECTOR == XXH_SVE
|
|
3604
|
+
#define ACCRND(acc, offset) \
|
|
3605
|
+
do { \
|
|
3606
|
+
svuint64_t input_vec = svld1_u64(mask, xinput + offset); \
|
|
3607
|
+
svuint64_t secret_vec = svld1_u64(mask, xsecret + offset); \
|
|
3608
|
+
svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec); \
|
|
3609
|
+
svuint64_t swapped = svtbl_u64(input_vec, kSwap); \
|
|
3610
|
+
svuint64_t mixed_lo = svextw_u64_x(mask, mixed); \
|
|
3611
|
+
svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32); \
|
|
3612
|
+
svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
|
|
3613
|
+
acc = svadd_u64_x(mask, acc, mul); \
|
|
3614
|
+
} while (0)
|
|
3615
|
+
#endif /* XXH_VECTOR == XXH_SVE */
|
|
3616
|
+
|
|
3035
3617
|
|
|
3036
3618
|
/* prefetch
|
|
3037
3619
|
* can be disabled, by declaring XXH_NO_PREFETCH build macro */
|
|
3038
3620
|
#if defined(XXH_NO_PREFETCH)
|
|
3039
3621
|
# define XXH_PREFETCH(ptr) (void)(ptr) /* disabled */
|
|
3040
3622
|
#else
|
|
3041
|
-
# if
|
|
3623
|
+
# if XXH_SIZE_OPT >= 1
|
|
3624
|
+
# define XXH_PREFETCH(ptr) (void)(ptr)
|
|
3625
|
+
# elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) /* _mm_prefetch() not defined outside of x86/x64 */
|
|
3042
3626
|
# include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
|
|
3043
3627
|
# define XXH_PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
|
|
3044
3628
|
# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
|
|
@@ -3103,7 +3687,6 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
|
3103
3687
|
return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
|
|
3104
3688
|
}
|
|
3105
3689
|
#elif defined(_MSC_VER) && defined(_M_IX86)
|
|
3106
|
-
# include <intrin.h>
|
|
3107
3690
|
# define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
|
|
3108
3691
|
#else
|
|
3109
3692
|
/*
|
|
@@ -3122,7 +3705,7 @@ XXH_mult32to64(xxh_u64 x, xxh_u64 y)
|
|
|
3122
3705
|
* Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
|
|
3123
3706
|
* version.
|
|
3124
3707
|
*
|
|
3125
|
-
* @param lhs, rhs The 64-bit integers to be multiplied
|
|
3708
|
+
* @param lhs , rhs The 64-bit integers to be multiplied
|
|
3126
3709
|
* @return The 128-bit result represented in an @ref XXH128_hash_t.
|
|
3127
3710
|
*/
|
|
3128
3711
|
static XXH128_hash_t
|
|
@@ -3143,7 +3726,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
|
3143
3726
|
* In that case it is best to use the portable one.
|
|
3144
3727
|
* https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
|
|
3145
3728
|
*/
|
|
3146
|
-
#if defined(__GNUC__) && !defined(__wasm__) \
|
|
3729
|
+
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
|
|
3147
3730
|
&& defined(__SIZEOF_INT128__) \
|
|
3148
3731
|
|| (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
|
|
3149
3732
|
|
|
@@ -3160,7 +3743,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
|
3160
3743
|
*
|
|
3161
3744
|
* This compiles to single operand MUL on x64.
|
|
3162
3745
|
*/
|
|
3163
|
-
#elif defined(_M_X64) || defined(_M_IA64)
|
|
3746
|
+
#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
|
|
3164
3747
|
|
|
3165
3748
|
#ifndef _MSC_VER
|
|
3166
3749
|
# pragma intrinsic(_umul128)
|
|
@@ -3172,6 +3755,21 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
|
3172
3755
|
r128.high64 = product_high;
|
|
3173
3756
|
return r128;
|
|
3174
3757
|
|
|
3758
|
+
/*
|
|
3759
|
+
* MSVC for ARM64's __umulh method.
|
|
3760
|
+
*
|
|
3761
|
+
* This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
|
|
3762
|
+
*/
|
|
3763
|
+
#elif defined(_M_ARM64) || defined(_M_ARM64EC)
|
|
3764
|
+
|
|
3765
|
+
#ifndef _MSC_VER
|
|
3766
|
+
# pragma intrinsic(__umulh)
|
|
3767
|
+
#endif
|
|
3768
|
+
XXH128_hash_t r128;
|
|
3769
|
+
r128.low64 = lhs * rhs;
|
|
3770
|
+
r128.high64 = __umulh(lhs, rhs);
|
|
3771
|
+
return r128;
|
|
3772
|
+
|
|
3175
3773
|
#else
|
|
3176
3774
|
/*
|
|
3177
3775
|
* Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
|
|
@@ -3240,7 +3838,7 @@ XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
|
|
|
3240
3838
|
* The reason for the separate function is to prevent passing too many structs
|
|
3241
3839
|
* around by value. This will hopefully inline the multiply, but we don't force it.
|
|
3242
3840
|
*
|
|
3243
|
-
* @param lhs, rhs The 64-bit integers to multiply
|
|
3841
|
+
* @param lhs , rhs The 64-bit integers to multiply
|
|
3244
3842
|
* @return The low 64 bits of the product XOR'd by the high 64 bits.
|
|
3245
3843
|
* @see XXH_mult64to128()
|
|
3246
3844
|
*/
|
|
@@ -3252,7 +3850,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
|
|
|
3252
3850
|
}
|
|
3253
3851
|
|
|
3254
3852
|
/*! Seems to produce slightly better code on GCC for some reason. */
|
|
3255
|
-
XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
|
|
3853
|
+
XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
|
|
3256
3854
|
{
|
|
3257
3855
|
XXH_ASSERT(0 <= shift && shift < 64);
|
|
3258
3856
|
return v64 ^ (v64 >> shift);
|
|
@@ -3319,7 +3917,7 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
|
|
|
3319
3917
|
*
|
|
3320
3918
|
* This adds an extra layer of strength for custom secrets.
|
|
3321
3919
|
*/
|
|
3322
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
|
3920
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
|
3323
3921
|
XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
3324
3922
|
{
|
|
3325
3923
|
XXH_ASSERT(input != NULL);
|
|
@@ -3341,7 +3939,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
|
|
|
3341
3939
|
}
|
|
3342
3940
|
}
|
|
3343
3941
|
|
|
3344
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
|
3942
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
|
3345
3943
|
XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
3346
3944
|
{
|
|
3347
3945
|
XXH_ASSERT(input != NULL);
|
|
@@ -3357,7 +3955,7 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
|
|
|
3357
3955
|
}
|
|
3358
3956
|
}
|
|
3359
3957
|
|
|
3360
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
|
3958
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
|
3361
3959
|
XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
3362
3960
|
{
|
|
3363
3961
|
XXH_ASSERT(input != NULL);
|
|
@@ -3374,7 +3972,7 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
|
|
|
3374
3972
|
}
|
|
3375
3973
|
}
|
|
3376
3974
|
|
|
3377
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
|
3975
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
|
3378
3976
|
XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
3379
3977
|
{
|
|
3380
3978
|
XXH_ASSERT(len <= 16);
|
|
@@ -3444,7 +4042,7 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
|
|
|
3444
4042
|
}
|
|
3445
4043
|
|
|
3446
4044
|
/* For mid range keys, XXH3 uses a Mum-hash variant. */
|
|
3447
|
-
XXH_FORCE_INLINE XXH64_hash_t
|
|
4045
|
+
XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
|
|
3448
4046
|
XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3449
4047
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
|
3450
4048
|
XXH64_hash_t seed)
|
|
@@ -3452,29 +4050,39 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
3452
4050
|
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
|
|
3453
4051
|
XXH_ASSERT(16 < len && len <= 128);
|
|
3454
4052
|
|
|
3455
|
-
{ xxh_u64 acc = len * XXH_PRIME64_1;
|
|
4053
|
+
{ xxh_u64 acc = len * XXH_PRIME64_1, acc_end;
|
|
4054
|
+
#if XXH_SIZE_OPT >= 1
|
|
4055
|
+
/* Smaller and cleaner, but slightly slower. */
|
|
4056
|
+
unsigned int i = (unsigned int)(len - 1) / 32;
|
|
4057
|
+
do {
|
|
4058
|
+
acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
|
|
4059
|
+
acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
|
|
4060
|
+
} while (i-- != 0);
|
|
4061
|
+
acc_end = 0;
|
|
4062
|
+
#else
|
|
4063
|
+
acc += XXH3_mix16B(input+0, secret+0, seed);
|
|
4064
|
+
acc_end = XXH3_mix16B(input+len-16, secret+16, seed);
|
|
3456
4065
|
if (len > 32) {
|
|
4066
|
+
acc += XXH3_mix16B(input+16, secret+32, seed);
|
|
4067
|
+
acc_end += XXH3_mix16B(input+len-32, secret+48, seed);
|
|
3457
4068
|
if (len > 64) {
|
|
4069
|
+
acc += XXH3_mix16B(input+32, secret+64, seed);
|
|
4070
|
+
acc_end += XXH3_mix16B(input+len-48, secret+80, seed);
|
|
4071
|
+
|
|
3458
4072
|
if (len > 96) {
|
|
3459
4073
|
acc += XXH3_mix16B(input+48, secret+96, seed);
|
|
3460
|
-
|
|
4074
|
+
acc_end += XXH3_mix16B(input+len-64, secret+112, seed);
|
|
3461
4075
|
}
|
|
3462
|
-
acc += XXH3_mix16B(input+32, secret+64, seed);
|
|
3463
|
-
acc += XXH3_mix16B(input+len-48, secret+80, seed);
|
|
3464
4076
|
}
|
|
3465
|
-
acc += XXH3_mix16B(input+16, secret+32, seed);
|
|
3466
|
-
acc += XXH3_mix16B(input+len-32, secret+48, seed);
|
|
3467
4077
|
}
|
|
3468
|
-
|
|
3469
|
-
|
|
3470
|
-
|
|
3471
|
-
return XXH3_avalanche(acc);
|
|
4078
|
+
#endif
|
|
4079
|
+
return XXH3_avalanche(acc + acc_end);
|
|
3472
4080
|
}
|
|
3473
4081
|
}
|
|
3474
4082
|
|
|
3475
4083
|
#define XXH3_MIDSIZE_MAX 240
|
|
3476
4084
|
|
|
3477
|
-
XXH_NO_INLINE XXH64_hash_t
|
|
4085
|
+
XXH_NO_INLINE XXH_PUREF XXH64_hash_t
|
|
3478
4086
|
XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
3479
4087
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
|
3480
4088
|
XXH64_hash_t seed)
|
|
@@ -3486,13 +4094,17 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
3486
4094
|
#define XXH3_MIDSIZE_LASTOFFSET 17
|
|
3487
4095
|
|
|
3488
4096
|
{ xxh_u64 acc = len * XXH_PRIME64_1;
|
|
3489
|
-
|
|
3490
|
-
int
|
|
4097
|
+
xxh_u64 acc_end;
|
|
4098
|
+
unsigned int const nbRounds = (unsigned int)len / 16;
|
|
4099
|
+
unsigned int i;
|
|
4100
|
+
XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
|
|
3491
4101
|
for (i=0; i<8; i++) {
|
|
3492
4102
|
acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
|
|
3493
4103
|
}
|
|
3494
|
-
|
|
4104
|
+
/* last bytes */
|
|
4105
|
+
acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
|
|
3495
4106
|
XXH_ASSERT(nbRounds >= 8);
|
|
4107
|
+
acc = XXH3_avalanche(acc);
|
|
3496
4108
|
#if defined(__clang__) /* Clang */ \
|
|
3497
4109
|
&& (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
|
|
3498
4110
|
&& !defined(XXH_ENABLE_AUTOVECTORIZE) /* Define to disable */
|
|
@@ -3519,11 +4131,13 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
3519
4131
|
#pragma clang loop vectorize(disable)
|
|
3520
4132
|
#endif
|
|
3521
4133
|
for (i=8 ; i < nbRounds; i++) {
|
|
3522
|
-
|
|
4134
|
+
/*
|
|
4135
|
+
* Prevents clang for unrolling the acc loop and interleaving with this one.
|
|
4136
|
+
*/
|
|
4137
|
+
XXH_COMPILER_GUARD(acc);
|
|
4138
|
+
acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
|
|
3523
4139
|
}
|
|
3524
|
-
|
|
3525
|
-
acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
|
|
3526
|
-
return XXH3_avalanche(acc);
|
|
4140
|
+
return XXH3_avalanche(acc + acc_end);
|
|
3527
4141
|
}
|
|
3528
4142
|
}
|
|
3529
4143
|
|
|
@@ -3539,10 +4153,51 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
3539
4153
|
# define ACC_NB XXH_ACC_NB
|
|
3540
4154
|
#endif
|
|
3541
4155
|
|
|
4156
|
+
#ifndef XXH_PREFETCH_DIST
|
|
4157
|
+
# ifdef __clang__
|
|
4158
|
+
# define XXH_PREFETCH_DIST 320
|
|
4159
|
+
# else
|
|
4160
|
+
# if (XXH_VECTOR == XXH_AVX512)
|
|
4161
|
+
# define XXH_PREFETCH_DIST 512
|
|
4162
|
+
# else
|
|
4163
|
+
# define XXH_PREFETCH_DIST 384
|
|
4164
|
+
# endif
|
|
4165
|
+
# endif /* __clang__ */
|
|
4166
|
+
#endif /* XXH_PREFETCH_DIST */
|
|
4167
|
+
|
|
4168
|
+
/*
|
|
4169
|
+
* These macros are to generate an XXH3_accumulate() function.
|
|
4170
|
+
* The two arguments select the name suffix and target attribute.
|
|
4171
|
+
*
|
|
4172
|
+
* The name of this symbol is XXH3_accumulate_<name>() and it calls
|
|
4173
|
+
* XXH3_accumulate_512_<name>().
|
|
4174
|
+
*
|
|
4175
|
+
* It may be useful to hand implement this function if the compiler fails to
|
|
4176
|
+
* optimize the inline function.
|
|
4177
|
+
*/
|
|
4178
|
+
#define XXH3_ACCUMULATE_TEMPLATE(name) \
|
|
4179
|
+
void \
|
|
4180
|
+
XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc, \
|
|
4181
|
+
const xxh_u8* XXH_RESTRICT input, \
|
|
4182
|
+
const xxh_u8* XXH_RESTRICT secret, \
|
|
4183
|
+
size_t nbStripes) \
|
|
4184
|
+
{ \
|
|
4185
|
+
size_t n; \
|
|
4186
|
+
for (n = 0; n < nbStripes; n++ ) { \
|
|
4187
|
+
const xxh_u8* const in = input + n*XXH_STRIPE_LEN; \
|
|
4188
|
+
XXH_PREFETCH(in + XXH_PREFETCH_DIST); \
|
|
4189
|
+
XXH3_accumulate_512_##name( \
|
|
4190
|
+
acc, \
|
|
4191
|
+
in, \
|
|
4192
|
+
secret + n*XXH_SECRET_CONSUME_RATE); \
|
|
4193
|
+
} \
|
|
4194
|
+
}
|
|
4195
|
+
|
|
4196
|
+
|
|
3542
4197
|
XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
|
3543
4198
|
{
|
|
3544
4199
|
if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
|
|
3545
|
-
|
|
4200
|
+
XXH_memcpy(dst, &v64, sizeof(v64));
|
|
3546
4201
|
}
|
|
3547
4202
|
|
|
3548
4203
|
/* Several intrinsic functions below are supposed to accept __int64 as argument,
|
|
@@ -3559,6 +4214,7 @@ XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
|
|
|
3559
4214
|
typedef long long xxh_i64;
|
|
3560
4215
|
#endif
|
|
3561
4216
|
|
|
4217
|
+
|
|
3562
4218
|
/*
|
|
3563
4219
|
* XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
|
|
3564
4220
|
*
|
|
@@ -3594,7 +4250,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
|
|
|
3594
4250
|
const void* XXH_RESTRICT input,
|
|
3595
4251
|
const void* XXH_RESTRICT secret)
|
|
3596
4252
|
{
|
|
3597
|
-
|
|
4253
|
+
__m512i* const xacc = (__m512i *) acc;
|
|
3598
4254
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
|
3599
4255
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
|
3600
4256
|
|
|
@@ -3606,7 +4262,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
|
|
|
3606
4262
|
/* data_key = data_vec ^ key_vec; */
|
|
3607
4263
|
__m512i const data_key = _mm512_xor_si512 (data_vec, key_vec);
|
|
3608
4264
|
/* data_key_lo = data_key >> 32; */
|
|
3609
|
-
__m512i const data_key_lo =
|
|
4265
|
+
__m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
|
|
3610
4266
|
/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
|
|
3611
4267
|
__m512i const product = _mm512_mul_epu32 (data_key, data_key_lo);
|
|
3612
4268
|
/* xacc[0] += swap(data_vec); */
|
|
@@ -3616,6 +4272,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
|
|
|
3616
4272
|
*xacc = _mm512_add_epi64(product, sum);
|
|
3617
4273
|
}
|
|
3618
4274
|
}
|
|
4275
|
+
XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
|
|
3619
4276
|
|
|
3620
4277
|
/*
|
|
3621
4278
|
* XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
|
|
@@ -3643,19 +4300,18 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
|
3643
4300
|
{
|
|
3644
4301
|
XXH_ASSERT((((size_t)acc) & 63) == 0);
|
|
3645
4302
|
XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
|
|
3646
|
-
{
|
|
4303
|
+
{ __m512i* const xacc = (__m512i*) acc;
|
|
3647
4304
|
const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
|
|
3648
4305
|
|
|
3649
4306
|
/* xacc[0] ^= (xacc[0] >> 47) */
|
|
3650
4307
|
__m512i const acc_vec = *xacc;
|
|
3651
4308
|
__m512i const shifted = _mm512_srli_epi64 (acc_vec, 47);
|
|
3652
|
-
__m512i const data_vec = _mm512_xor_si512 (acc_vec, shifted);
|
|
3653
4309
|
/* xacc[0] ^= secret; */
|
|
3654
4310
|
__m512i const key_vec = _mm512_loadu_si512 (secret);
|
|
3655
|
-
__m512i const data_key =
|
|
4311
|
+
__m512i const data_key = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
|
|
3656
4312
|
|
|
3657
4313
|
/* xacc[0] *= XXH_PRIME32_1; */
|
|
3658
|
-
__m512i const data_key_hi =
|
|
4314
|
+
__m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
|
|
3659
4315
|
__m512i const prod_lo = _mm512_mul_epu32 (data_key, prime32);
|
|
3660
4316
|
__m512i const prod_hi = _mm512_mul_epu32 (data_key_hi, prime32);
|
|
3661
4317
|
*xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
|
|
@@ -3670,20 +4326,16 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
|
3670
4326
|
XXH_ASSERT(((size_t)customSecret & 63) == 0);
|
|
3671
4327
|
(void)(&XXH_writeLE64);
|
|
3672
4328
|
{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
|
|
3673
|
-
__m512i const
|
|
4329
|
+
__m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
|
|
4330
|
+
__m512i const seed = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
|
|
3674
4331
|
|
|
3675
|
-
|
|
3676
|
-
|
|
4332
|
+
const __m512i* const src = (const __m512i*) ((const void*) XXH3_kSecret);
|
|
4333
|
+
__m512i* const dest = ( __m512i*) customSecret;
|
|
3677
4334
|
int i;
|
|
4335
|
+
XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
|
|
4336
|
+
XXH_ASSERT(((size_t)dest & 63) == 0);
|
|
3678
4337
|
for (i=0; i < nbRounds; ++i) {
|
|
3679
|
-
|
|
3680
|
-
* this will warn "discards 'const' qualifier". */
|
|
3681
|
-
union {
|
|
3682
|
-
XXH_ALIGN(64) const __m512i* cp;
|
|
3683
|
-
XXH_ALIGN(64) void* p;
|
|
3684
|
-
} remote_const_void;
|
|
3685
|
-
remote_const_void.cp = src + i;
|
|
3686
|
-
dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
|
|
4338
|
+
dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
|
|
3687
4339
|
} }
|
|
3688
4340
|
}
|
|
3689
4341
|
|
|
@@ -3702,7 +4354,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|
|
3702
4354
|
const void* XXH_RESTRICT secret)
|
|
3703
4355
|
{
|
|
3704
4356
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
|
3705
|
-
{
|
|
4357
|
+
{ __m256i* const xacc = (__m256i *) acc;
|
|
3706
4358
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
|
3707
4359
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
|
3708
4360
|
const __m256i* const xinput = (const __m256i *) input;
|
|
@@ -3719,7 +4371,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|
|
3719
4371
|
/* data_key = data_vec ^ key_vec; */
|
|
3720
4372
|
__m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
|
|
3721
4373
|
/* data_key_lo = data_key >> 32; */
|
|
3722
|
-
__m256i const data_key_lo =
|
|
4374
|
+
__m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
|
|
3723
4375
|
/* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
|
|
3724
4376
|
__m256i const product = _mm256_mul_epu32 (data_key, data_key_lo);
|
|
3725
4377
|
/* xacc[i] += swap(data_vec); */
|
|
@@ -3729,12 +4381,13 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
|
|
|
3729
4381
|
xacc[i] = _mm256_add_epi64(product, sum);
|
|
3730
4382
|
} }
|
|
3731
4383
|
}
|
|
4384
|
+
XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
|
|
3732
4385
|
|
|
3733
4386
|
XXH_FORCE_INLINE XXH_TARGET_AVX2 void
|
|
3734
4387
|
XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
3735
4388
|
{
|
|
3736
4389
|
XXH_ASSERT((((size_t)acc) & 31) == 0);
|
|
3737
|
-
{
|
|
4390
|
+
{ __m256i* const xacc = (__m256i*) acc;
|
|
3738
4391
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
|
3739
4392
|
* _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
|
|
3740
4393
|
const __m256i* const xsecret = (const __m256i *) secret;
|
|
@@ -3751,7 +4404,7 @@ XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
|
3751
4404
|
__m256i const data_key = _mm256_xor_si256 (data_vec, key_vec);
|
|
3752
4405
|
|
|
3753
4406
|
/* xacc[i] *= XXH_PRIME32_1; */
|
|
3754
|
-
__m256i const data_key_hi =
|
|
4407
|
+
__m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
|
|
3755
4408
|
__m256i const prod_lo = _mm256_mul_epu32 (data_key, prime32);
|
|
3756
4409
|
__m256i const prod_hi = _mm256_mul_epu32 (data_key_hi, prime32);
|
|
3757
4410
|
xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
|
|
@@ -3768,8 +4421,8 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR
|
|
|
3768
4421
|
XXH_PREFETCH(customSecret);
|
|
3769
4422
|
{ __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
|
|
3770
4423
|
|
|
3771
|
-
|
|
3772
|
-
|
|
4424
|
+
const __m256i* const src = (const __m256i*) ((const void*) XXH3_kSecret);
|
|
4425
|
+
__m256i* dest = ( __m256i*) customSecret;
|
|
3773
4426
|
|
|
3774
4427
|
# if defined(__GNUC__) || defined(__clang__)
|
|
3775
4428
|
/*
|
|
@@ -3779,14 +4432,16 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR
|
|
|
3779
4432
|
*/
|
|
3780
4433
|
XXH_COMPILER_GUARD(dest);
|
|
3781
4434
|
# endif
|
|
4435
|
+
XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
|
|
4436
|
+
XXH_ASSERT(((size_t)dest & 31) == 0);
|
|
3782
4437
|
|
|
3783
4438
|
/* GCC -O2 need unroll loop manually */
|
|
3784
|
-
dest[0] = _mm256_add_epi64(
|
|
3785
|
-
dest[1] = _mm256_add_epi64(
|
|
3786
|
-
dest[2] = _mm256_add_epi64(
|
|
3787
|
-
dest[3] = _mm256_add_epi64(
|
|
3788
|
-
dest[4] = _mm256_add_epi64(
|
|
3789
|
-
dest[5] = _mm256_add_epi64(
|
|
4439
|
+
dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
|
|
4440
|
+
dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
|
|
4441
|
+
dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
|
|
4442
|
+
dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
|
|
4443
|
+
dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
|
|
4444
|
+
dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
|
|
3790
4445
|
}
|
|
3791
4446
|
}
|
|
3792
4447
|
|
|
@@ -3806,7 +4461,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
|
|
|
3806
4461
|
{
|
|
3807
4462
|
/* SSE2 is just a half-scale version of the AVX2 version. */
|
|
3808
4463
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
|
3809
|
-
{
|
|
4464
|
+
{ __m128i* const xacc = (__m128i *) acc;
|
|
3810
4465
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
|
3811
4466
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
|
3812
4467
|
const __m128i* const xinput = (const __m128i *) input;
|
|
@@ -3833,12 +4488,13 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
|
|
|
3833
4488
|
xacc[i] = _mm_add_epi64(product, sum);
|
|
3834
4489
|
} }
|
|
3835
4490
|
}
|
|
4491
|
+
XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
|
|
3836
4492
|
|
|
3837
4493
|
XXH_FORCE_INLINE XXH_TARGET_SSE2 void
|
|
3838
4494
|
XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
3839
4495
|
{
|
|
3840
4496
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
|
3841
|
-
{
|
|
4497
|
+
{ __m128i* const xacc = (__m128i*) acc;
|
|
3842
4498
|
/* Unaligned. This is mainly for pointer arithmetic, and because
|
|
3843
4499
|
* _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
|
|
3844
4500
|
const __m128i* const xsecret = (const __m128i *) secret;
|
|
@@ -3870,7 +4526,7 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
|
|
|
3870
4526
|
{ int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
|
|
3871
4527
|
|
|
3872
4528
|
# if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
|
|
3873
|
-
|
|
4529
|
+
/* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
|
|
3874
4530
|
XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
|
|
3875
4531
|
__m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
|
|
3876
4532
|
# else
|
|
@@ -3878,19 +4534,21 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
|
|
|
3878
4534
|
# endif
|
|
3879
4535
|
int i;
|
|
3880
4536
|
|
|
3881
|
-
|
|
3882
|
-
|
|
4537
|
+
const void* const src16 = XXH3_kSecret;
|
|
4538
|
+
__m128i* dst16 = (__m128i*) customSecret;
|
|
3883
4539
|
# if defined(__GNUC__) || defined(__clang__)
|
|
3884
4540
|
/*
|
|
3885
4541
|
* On GCC & Clang, marking 'dest' as modified will cause the compiler:
|
|
3886
4542
|
* - do not extract the secret from sse registers in the internal loop
|
|
3887
4543
|
* - use less common registers, and avoid pushing these reg into stack
|
|
3888
4544
|
*/
|
|
3889
|
-
XXH_COMPILER_GUARD(
|
|
4545
|
+
XXH_COMPILER_GUARD(dst16);
|
|
3890
4546
|
# endif
|
|
4547
|
+
XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
|
|
4548
|
+
XXH_ASSERT(((size_t)dst16 & 15) == 0);
|
|
3891
4549
|
|
|
3892
4550
|
for (i=0; i < nbRounds; ++i) {
|
|
3893
|
-
|
|
4551
|
+
dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
|
|
3894
4552
|
} }
|
|
3895
4553
|
}
|
|
3896
4554
|
|
|
@@ -3898,42 +4556,112 @@ XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTR
|
|
|
3898
4556
|
|
|
3899
4557
|
#if (XXH_VECTOR == XXH_NEON)
|
|
3900
4558
|
|
|
4559
|
+
/* forward declarations for the scalar routines */
|
|
4560
|
+
XXH_FORCE_INLINE void
|
|
4561
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
|
|
4562
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
|
4563
|
+
|
|
4564
|
+
XXH_FORCE_INLINE void
|
|
4565
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
|
4566
|
+
void const* XXH_RESTRICT secret, size_t lane);
|
|
4567
|
+
|
|
4568
|
+
/*!
|
|
4569
|
+
* @internal
|
|
4570
|
+
* @brief The bulk processing loop for NEON.
|
|
4571
|
+
*
|
|
4572
|
+
* The NEON code path is actually partially scalar when running on AArch64. This
|
|
4573
|
+
* is to optimize the pipelining and can have up to 15% speedup depending on the
|
|
4574
|
+
* CPU, and it also mitigates some GCC codegen issues.
|
|
4575
|
+
*
|
|
4576
|
+
* @see XXH3_NEON_LANES for configuring this and details about this optimization.
|
|
4577
|
+
*/
|
|
3901
4578
|
XXH_FORCE_INLINE void
|
|
3902
4579
|
XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
|
|
3903
4580
|
const void* XXH_RESTRICT input,
|
|
3904
4581
|
const void* XXH_RESTRICT secret)
|
|
3905
4582
|
{
|
|
3906
4583
|
XXH_ASSERT((((size_t)acc) & 15) == 0);
|
|
4584
|
+
XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
|
|
3907
4585
|
{
|
|
3908
|
-
|
|
4586
|
+
uint64x2_t* const xacc = (uint64x2_t *) acc;
|
|
3909
4587
|
/* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
|
|
3910
4588
|
uint8_t const* const xinput = (const uint8_t *) input;
|
|
3911
4589
|
uint8_t const* const xsecret = (const uint8_t *) secret;
|
|
3912
4590
|
|
|
3913
4591
|
size_t i;
|
|
3914
|
-
|
|
4592
|
+
/* AArch64 uses both scalar and neon at the same time */
|
|
4593
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
|
4594
|
+
XXH3_scalarRound(acc, input, secret, i);
|
|
4595
|
+
}
|
|
4596
|
+
i = 0;
|
|
4597
|
+
for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
|
|
4598
|
+
uint64x2_t acc_vec1 = xacc[i];
|
|
4599
|
+
/* data_vec = xinput[i]; */
|
|
4600
|
+
uint64x2_t data_vec1 = XXH_vld1q_u64(xinput + (i * 16));
|
|
4601
|
+
/* key_vec = xsecret[i]; */
|
|
4602
|
+
uint64x2_t key_vec1 = XXH_vld1q_u64(xsecret + (i * 16));
|
|
4603
|
+
/* acc_vec_2 = swap(data_vec) */
|
|
4604
|
+
uint64x2_t acc_vec_21 = vextq_u64(data_vec1, data_vec1, 1);
|
|
4605
|
+
/* data_key = data_vec ^ key_vec; */
|
|
4606
|
+
uint64x2_t data_key1 = veorq_u64(data_vec1, key_vec1);
|
|
4607
|
+
|
|
4608
|
+
uint64x2_t acc_vec2 = xacc[i+1];
|
|
3915
4609
|
/* data_vec = xinput[i]; */
|
|
3916
|
-
|
|
4610
|
+
uint64x2_t data_vec2 = XXH_vld1q_u64(xinput + ((i+1) * 16));
|
|
3917
4611
|
/* key_vec = xsecret[i]; */
|
|
3918
|
-
|
|
4612
|
+
uint64x2_t key_vec2 = XXH_vld1q_u64(xsecret + ((i+1) * 16));
|
|
4613
|
+
/* acc_vec_2 = swap(data_vec) */
|
|
4614
|
+
uint64x2_t acc_vec_22 = vextq_u64(data_vec2, data_vec2, 1);
|
|
4615
|
+
/* data_key = data_vec ^ key_vec; */
|
|
4616
|
+
uint64x2_t data_key2 = veorq_u64(data_vec2, key_vec2);
|
|
4617
|
+
|
|
4618
|
+
/* data_key_lo = {(data_key1 & 0xFFFFFFFF), (data_key2 & 0xFFFFFFFF)};
|
|
4619
|
+
* data_key_hi = {(data_key1 >> 32), (data_key2 >> 32)};
|
|
4620
|
+
*/
|
|
4621
|
+
uint32x4x2_t zipped = vuzpq_u32(vreinterpretq_u32_u64(data_key1), vreinterpretq_u32_u64(data_key2));
|
|
4622
|
+
uint32x4_t data_key_lo = zipped.val[0];
|
|
4623
|
+
uint32x4_t data_key_hi = zipped.val[1];
|
|
4624
|
+
|
|
4625
|
+
/* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
|
|
4626
|
+
acc_vec_21 = vmlal_u32 (acc_vec_21, vget_low_u32(data_key_lo), vget_low_u32(data_key_hi));
|
|
4627
|
+
XXH_COMPILER_GUARD_W(acc_vec_21);
|
|
4628
|
+
/* xacc[i] += acc_vec_2; */
|
|
4629
|
+
acc_vec1 = vaddq_u64 (acc_vec1, acc_vec_21);
|
|
4630
|
+
xacc[i] = acc_vec1;
|
|
4631
|
+
/* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
|
|
4632
|
+
acc_vec_22 = vmlal_u32 (acc_vec_22, vget_high_u32(data_key_lo), vget_high_u32(data_key_hi));
|
|
4633
|
+
XXH_COMPILER_GUARD_W(acc_vec_22);
|
|
4634
|
+
/* xacc[i] += acc_vec_2; */
|
|
4635
|
+
acc_vec2 = vaddq_u64 (acc_vec2, acc_vec_22);
|
|
4636
|
+
xacc[i+1] = acc_vec2;
|
|
4637
|
+
}
|
|
4638
|
+
for (; i < XXH3_NEON_LANES / 2; i++) {
|
|
4639
|
+
uint64x2_t acc_vec = xacc[i];
|
|
4640
|
+
/* data_vec = xinput[i]; */
|
|
4641
|
+
uint64x2_t data_vec = XXH_vld1q_u64(xinput + (i * 16));
|
|
4642
|
+
/* key_vec = xsecret[i]; */
|
|
4643
|
+
uint64x2_t key_vec = XXH_vld1q_u64(xsecret + (i * 16));
|
|
3919
4644
|
uint64x2_t data_key;
|
|
3920
4645
|
uint32x2_t data_key_lo, data_key_hi;
|
|
3921
|
-
/*
|
|
3922
|
-
uint64x2_t
|
|
3923
|
-
uint64x2_t const swapped = vextq_u64(data64, data64, 1);
|
|
3924
|
-
xacc[i] = vaddq_u64 (xacc[i], swapped);
|
|
4646
|
+
/* acc_vec_2 = swap(data_vec) */
|
|
4647
|
+
uint64x2_t acc_vec_2 = vextq_u64(data_vec, data_vec, 1);
|
|
3925
4648
|
/* data_key = data_vec ^ key_vec; */
|
|
3926
|
-
data_key =
|
|
4649
|
+
data_key = veorq_u64(data_vec, key_vec);
|
|
3927
4650
|
/* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
|
|
3928
4651
|
* data_key_hi = (uint32x2_t) (data_key >> 32);
|
|
3929
4652
|
* data_key = UNDEFINED; */
|
|
3930
4653
|
XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
|
|
3931
|
-
/*
|
|
3932
|
-
|
|
3933
|
-
|
|
4654
|
+
/* acc_vec_2 += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
|
|
4655
|
+
acc_vec_2 = vmlal_u32 (acc_vec_2, data_key_lo, data_key_hi);
|
|
4656
|
+
XXH_COMPILER_GUARD_W(acc_vec_2);
|
|
4657
|
+
/* xacc[i] += acc_vec_2; */
|
|
4658
|
+
acc_vec = vaddq_u64 (acc_vec, acc_vec_2);
|
|
4659
|
+
xacc[i] = acc_vec;
|
|
3934
4660
|
}
|
|
4661
|
+
|
|
3935
4662
|
}
|
|
3936
4663
|
}
|
|
4664
|
+
XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
|
|
3937
4665
|
|
|
3938
4666
|
XXH_FORCE_INLINE void
|
|
3939
4667
|
XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
@@ -3945,15 +4673,19 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
|
3945
4673
|
uint32x2_t prime = vdup_n_u32 (XXH_PRIME32_1);
|
|
3946
4674
|
|
|
3947
4675
|
size_t i;
|
|
3948
|
-
|
|
4676
|
+
/* AArch64 uses both scalar and neon at the same time */
|
|
4677
|
+
for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
|
|
4678
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
|
4679
|
+
}
|
|
4680
|
+
for (i=0; i < XXH3_NEON_LANES / 2; i++) {
|
|
3949
4681
|
/* xacc[i] ^= (xacc[i] >> 47); */
|
|
3950
4682
|
uint64x2_t acc_vec = xacc[i];
|
|
3951
|
-
uint64x2_t shifted = vshrq_n_u64
|
|
3952
|
-
uint64x2_t data_vec = veorq_u64
|
|
4683
|
+
uint64x2_t shifted = vshrq_n_u64 (acc_vec, 47);
|
|
4684
|
+
uint64x2_t data_vec = veorq_u64 (acc_vec, shifted);
|
|
3953
4685
|
|
|
3954
4686
|
/* xacc[i] ^= xsecret[i]; */
|
|
3955
|
-
|
|
3956
|
-
uint64x2_t data_key = veorq_u64(data_vec,
|
|
4687
|
+
uint64x2_t key_vec = XXH_vld1q_u64 (xsecret + (i * 16));
|
|
4688
|
+
uint64x2_t data_key = veorq_u64 (data_vec, key_vec);
|
|
3957
4689
|
|
|
3958
4690
|
/* xacc[i] *= XXH_PRIME32_1 */
|
|
3959
4691
|
uint32x2_t data_key_lo, data_key_hi;
|
|
@@ -3981,11 +4713,12 @@ XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
|
3981
4713
|
*/
|
|
3982
4714
|
uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
|
|
3983
4715
|
/* xacc[i] = prod_hi << 32; */
|
|
3984
|
-
|
|
4716
|
+
prod_hi = vshlq_n_u64(prod_hi, 32);
|
|
3985
4717
|
/* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
|
|
3986
|
-
xacc[i] = vmlal_u32(
|
|
4718
|
+
xacc[i] = vmlal_u32(prod_hi, data_key_lo, prime);
|
|
3987
4719
|
}
|
|
3988
|
-
|
|
4720
|
+
}
|
|
4721
|
+
}
|
|
3989
4722
|
}
|
|
3990
4723
|
|
|
3991
4724
|
#endif
|
|
@@ -3997,7 +4730,8 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
|
3997
4730
|
const void* XXH_RESTRICT input,
|
|
3998
4731
|
const void* XXH_RESTRICT secret)
|
|
3999
4732
|
{
|
|
4000
|
-
|
|
4733
|
+
/* presumed aligned */
|
|
4734
|
+
unsigned int* const xacc = (unsigned int*) acc;
|
|
4001
4735
|
xxh_u64x2 const* const xinput = (xxh_u64x2 const*) input; /* no alignment restriction */
|
|
4002
4736
|
xxh_u64x2 const* const xsecret = (xxh_u64x2 const*) secret; /* no alignment restriction */
|
|
4003
4737
|
xxh_u64x2 const v32 = { 32, 32 };
|
|
@@ -4012,16 +4746,21 @@ XXH3_accumulate_512_vsx( void* XXH_RESTRICT acc,
|
|
|
4012
4746
|
xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
|
|
4013
4747
|
/* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
|
|
4014
4748
|
xxh_u64x2 const product = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
|
|
4015
|
-
xacc[i]
|
|
4749
|
+
/* acc_vec = xacc[i]; */
|
|
4750
|
+
xxh_u64x2 acc_vec = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
|
|
4751
|
+
acc_vec += product;
|
|
4016
4752
|
|
|
4017
4753
|
/* swap high and low halves */
|
|
4018
4754
|
#ifdef __s390x__
|
|
4019
|
-
|
|
4755
|
+
acc_vec += vec_permi(data_vec, data_vec, 2);
|
|
4020
4756
|
#else
|
|
4021
|
-
|
|
4757
|
+
acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
|
|
4022
4758
|
#endif
|
|
4759
|
+
/* xacc[i] = acc_vec; */
|
|
4760
|
+
vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
|
|
4023
4761
|
}
|
|
4024
4762
|
}
|
|
4763
|
+
XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
|
|
4025
4764
|
|
|
4026
4765
|
XXH_FORCE_INLINE void
|
|
4027
4766
|
XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
@@ -4055,40 +4794,202 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
|
4055
4794
|
|
|
4056
4795
|
#endif
|
|
4057
4796
|
|
|
4797
|
+
#if (XXH_VECTOR == XXH_SVE)
|
|
4798
|
+
|
|
4799
|
+
XXH_FORCE_INLINE void
|
|
4800
|
+
XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
|
|
4801
|
+
const void* XXH_RESTRICT input,
|
|
4802
|
+
const void* XXH_RESTRICT secret)
|
|
4803
|
+
{
|
|
4804
|
+
uint64_t *xacc = (uint64_t *)acc;
|
|
4805
|
+
const uint64_t *xinput = (const uint64_t *)(const void *)input;
|
|
4806
|
+
const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
|
|
4807
|
+
svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
|
|
4808
|
+
uint64_t element_count = svcntd();
|
|
4809
|
+
if (element_count >= 8) {
|
|
4810
|
+
svbool_t mask = svptrue_pat_b64(SV_VL8);
|
|
4811
|
+
svuint64_t vacc = svld1_u64(mask, xacc);
|
|
4812
|
+
ACCRND(vacc, 0);
|
|
4813
|
+
svst1_u64(mask, xacc, vacc);
|
|
4814
|
+
} else if (element_count == 2) { /* sve128 */
|
|
4815
|
+
svbool_t mask = svptrue_pat_b64(SV_VL2);
|
|
4816
|
+
svuint64_t acc0 = svld1_u64(mask, xacc + 0);
|
|
4817
|
+
svuint64_t acc1 = svld1_u64(mask, xacc + 2);
|
|
4818
|
+
svuint64_t acc2 = svld1_u64(mask, xacc + 4);
|
|
4819
|
+
svuint64_t acc3 = svld1_u64(mask, xacc + 6);
|
|
4820
|
+
ACCRND(acc0, 0);
|
|
4821
|
+
ACCRND(acc1, 2);
|
|
4822
|
+
ACCRND(acc2, 4);
|
|
4823
|
+
ACCRND(acc3, 6);
|
|
4824
|
+
svst1_u64(mask, xacc + 0, acc0);
|
|
4825
|
+
svst1_u64(mask, xacc + 2, acc1);
|
|
4826
|
+
svst1_u64(mask, xacc + 4, acc2);
|
|
4827
|
+
svst1_u64(mask, xacc + 6, acc3);
|
|
4828
|
+
} else {
|
|
4829
|
+
svbool_t mask = svptrue_pat_b64(SV_VL4);
|
|
4830
|
+
svuint64_t acc0 = svld1_u64(mask, xacc + 0);
|
|
4831
|
+
svuint64_t acc1 = svld1_u64(mask, xacc + 4);
|
|
4832
|
+
ACCRND(acc0, 0);
|
|
4833
|
+
ACCRND(acc1, 4);
|
|
4834
|
+
svst1_u64(mask, xacc + 0, acc0);
|
|
4835
|
+
svst1_u64(mask, xacc + 4, acc1);
|
|
4836
|
+
}
|
|
4837
|
+
}
|
|
4838
|
+
|
|
4839
|
+
XXH_FORCE_INLINE void
|
|
4840
|
+
XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
|
|
4841
|
+
const xxh_u8* XXH_RESTRICT input,
|
|
4842
|
+
const xxh_u8* XXH_RESTRICT secret,
|
|
4843
|
+
size_t nbStripes)
|
|
4844
|
+
{
|
|
4845
|
+
if (nbStripes != 0) {
|
|
4846
|
+
uint64_t *xacc = (uint64_t *)acc;
|
|
4847
|
+
const uint64_t *xinput = (const uint64_t *)(const void *)input;
|
|
4848
|
+
const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
|
|
4849
|
+
svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
|
|
4850
|
+
uint64_t element_count = svcntd();
|
|
4851
|
+
if (element_count >= 8) {
|
|
4852
|
+
svbool_t mask = svptrue_pat_b64(SV_VL8);
|
|
4853
|
+
svuint64_t vacc = svld1_u64(mask, xacc + 0);
|
|
4854
|
+
do {
|
|
4855
|
+
/* svprfd(svbool_t, void *, enum svfprop); */
|
|
4856
|
+
svprfd(mask, xinput + 128, SV_PLDL1STRM);
|
|
4857
|
+
ACCRND(vacc, 0);
|
|
4858
|
+
xinput += 8;
|
|
4859
|
+
xsecret += 1;
|
|
4860
|
+
nbStripes--;
|
|
4861
|
+
} while (nbStripes != 0);
|
|
4862
|
+
|
|
4863
|
+
svst1_u64(mask, xacc + 0, vacc);
|
|
4864
|
+
} else if (element_count == 2) { /* sve128 */
|
|
4865
|
+
svbool_t mask = svptrue_pat_b64(SV_VL2);
|
|
4866
|
+
svuint64_t acc0 = svld1_u64(mask, xacc + 0);
|
|
4867
|
+
svuint64_t acc1 = svld1_u64(mask, xacc + 2);
|
|
4868
|
+
svuint64_t acc2 = svld1_u64(mask, xacc + 4);
|
|
4869
|
+
svuint64_t acc3 = svld1_u64(mask, xacc + 6);
|
|
4870
|
+
do {
|
|
4871
|
+
svprfd(mask, xinput + 128, SV_PLDL1STRM);
|
|
4872
|
+
ACCRND(acc0, 0);
|
|
4873
|
+
ACCRND(acc1, 2);
|
|
4874
|
+
ACCRND(acc2, 4);
|
|
4875
|
+
ACCRND(acc3, 6);
|
|
4876
|
+
xinput += 8;
|
|
4877
|
+
xsecret += 1;
|
|
4878
|
+
nbStripes--;
|
|
4879
|
+
} while (nbStripes != 0);
|
|
4880
|
+
|
|
4881
|
+
svst1_u64(mask, xacc + 0, acc0);
|
|
4882
|
+
svst1_u64(mask, xacc + 2, acc1);
|
|
4883
|
+
svst1_u64(mask, xacc + 4, acc2);
|
|
4884
|
+
svst1_u64(mask, xacc + 6, acc3);
|
|
4885
|
+
} else {
|
|
4886
|
+
svbool_t mask = svptrue_pat_b64(SV_VL4);
|
|
4887
|
+
svuint64_t acc0 = svld1_u64(mask, xacc + 0);
|
|
4888
|
+
svuint64_t acc1 = svld1_u64(mask, xacc + 4);
|
|
4889
|
+
do {
|
|
4890
|
+
svprfd(mask, xinput + 128, SV_PLDL1STRM);
|
|
4891
|
+
ACCRND(acc0, 0);
|
|
4892
|
+
ACCRND(acc1, 4);
|
|
4893
|
+
xinput += 8;
|
|
4894
|
+
xsecret += 1;
|
|
4895
|
+
nbStripes--;
|
|
4896
|
+
} while (nbStripes != 0);
|
|
4897
|
+
|
|
4898
|
+
svst1_u64(mask, xacc + 0, acc0);
|
|
4899
|
+
svst1_u64(mask, xacc + 4, acc1);
|
|
4900
|
+
}
|
|
4901
|
+
}
|
|
4902
|
+
}
|
|
4903
|
+
|
|
4904
|
+
#endif
|
|
4905
|
+
|
|
4058
4906
|
/* scalar variants - universal */
|
|
4059
4907
|
|
|
4908
|
+
/*!
|
|
4909
|
+
* @internal
|
|
4910
|
+
* @brief Scalar round for @ref XXH3_accumulate_512_scalar().
|
|
4911
|
+
*
|
|
4912
|
+
* This is extracted to its own function because the NEON path uses a combination
|
|
4913
|
+
* of NEON and scalar.
|
|
4914
|
+
*/
|
|
4915
|
+
XXH_FORCE_INLINE void
|
|
4916
|
+
XXH3_scalarRound(void* XXH_RESTRICT acc,
|
|
4917
|
+
void const* XXH_RESTRICT input,
|
|
4918
|
+
void const* XXH_RESTRICT secret,
|
|
4919
|
+
size_t lane)
|
|
4920
|
+
{
|
|
4921
|
+
xxh_u64* xacc = (xxh_u64*) acc;
|
|
4922
|
+
xxh_u8 const* xinput = (xxh_u8 const*) input;
|
|
4923
|
+
xxh_u8 const* xsecret = (xxh_u8 const*) secret;
|
|
4924
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
|
4925
|
+
XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
|
|
4926
|
+
{
|
|
4927
|
+
xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
|
|
4928
|
+
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
|
|
4929
|
+
xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
|
|
4930
|
+
xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
|
4931
|
+
}
|
|
4932
|
+
}
|
|
4933
|
+
|
|
4934
|
+
/*!
|
|
4935
|
+
* @internal
|
|
4936
|
+
* @brief Processes a 64 byte block of data using the scalar path.
|
|
4937
|
+
*/
|
|
4060
4938
|
XXH_FORCE_INLINE void
|
|
4061
4939
|
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
|
|
4062
4940
|
const void* XXH_RESTRICT input,
|
|
4063
4941
|
const void* XXH_RESTRICT secret)
|
|
4064
4942
|
{
|
|
4065
|
-
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
|
4066
|
-
const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
|
|
4067
|
-
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
|
4068
4943
|
size_t i;
|
|
4069
|
-
|
|
4944
|
+
/* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
|
|
4945
|
+
#if defined(__GNUC__) && !defined(__clang__) \
|
|
4946
|
+
&& (defined(__arm__) || defined(__thumb2__)) \
|
|
4947
|
+
&& defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
|
|
4948
|
+
&& XXH_SIZE_OPT <= 0
|
|
4949
|
+
# pragma GCC unroll 8
|
|
4950
|
+
#endif
|
|
4070
4951
|
for (i=0; i < XXH_ACC_NB; i++) {
|
|
4071
|
-
|
|
4072
|
-
xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + i*8);
|
|
4073
|
-
xacc[i ^ 1] += data_val; /* swap adjacent lanes */
|
|
4074
|
-
xacc[i] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
|
|
4952
|
+
XXH3_scalarRound(acc, input, secret, i);
|
|
4075
4953
|
}
|
|
4076
4954
|
}
|
|
4955
|
+
XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
|
|
4077
4956
|
|
|
4957
|
+
/*!
|
|
4958
|
+
* @internal
|
|
4959
|
+
* @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
|
|
4960
|
+
*
|
|
4961
|
+
* This is extracted to its own function because the NEON path uses a combination
|
|
4962
|
+
* of NEON and scalar.
|
|
4963
|
+
*/
|
|
4078
4964
|
XXH_FORCE_INLINE void
|
|
4079
|
-
|
|
4965
|
+
XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
|
|
4966
|
+
void const* XXH_RESTRICT secret,
|
|
4967
|
+
size_t lane)
|
|
4080
4968
|
{
|
|
4081
|
-
|
|
4969
|
+
xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
|
|
4082
4970
|
const xxh_u8* const xsecret = (const xxh_u8*) secret; /* no alignment restriction */
|
|
4083
|
-
size_t i;
|
|
4084
4971
|
XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
|
|
4085
|
-
|
|
4086
|
-
|
|
4087
|
-
xxh_u64
|
|
4972
|
+
XXH_ASSERT(lane < XXH_ACC_NB);
|
|
4973
|
+
{
|
|
4974
|
+
xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
|
|
4975
|
+
xxh_u64 acc64 = xacc[lane];
|
|
4088
4976
|
acc64 = XXH_xorshift64(acc64, 47);
|
|
4089
4977
|
acc64 ^= key64;
|
|
4090
4978
|
acc64 *= XXH_PRIME32_1;
|
|
4091
|
-
xacc[
|
|
4979
|
+
xacc[lane] = acc64;
|
|
4980
|
+
}
|
|
4981
|
+
}
|
|
4982
|
+
|
|
4983
|
+
/*!
|
|
4984
|
+
* @internal
|
|
4985
|
+
* @brief Scrambles the accumulators after a large chunk has been read
|
|
4986
|
+
*/
|
|
4987
|
+
XXH_FORCE_INLINE void
|
|
4988
|
+
XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
|
|
4989
|
+
{
|
|
4990
|
+
size_t i;
|
|
4991
|
+
for (i=0; i < XXH_ACC_NB; i++) {
|
|
4992
|
+
XXH3_scalarScrambleRound(acc, secret, i);
|
|
4092
4993
|
}
|
|
4093
4994
|
}
|
|
4094
4995
|
|
|
@@ -4110,8 +5011,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
|
4110
5011
|
* placed sequentially, in order, at the top of the unrolled loop.
|
|
4111
5012
|
*
|
|
4112
5013
|
* While MOVK is great for generating constants (2 cycles for a 64-bit
|
|
4113
|
-
* constant compared to 4 cycles for LDR),
|
|
4114
|
-
*
|
|
5014
|
+
* constant compared to 4 cycles for LDR), it fights for bandwidth with
|
|
5015
|
+
* the arithmetic instructions.
|
|
5016
|
+
*
|
|
4115
5017
|
* I L S
|
|
4116
5018
|
* MOVK
|
|
4117
5019
|
* MOVK
|
|
@@ -4128,6 +5030,9 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
|
4128
5030
|
* ADD LDR
|
|
4129
5031
|
* SUB STR
|
|
4130
5032
|
* STR
|
|
5033
|
+
*
|
|
5034
|
+
* See XXH3_NEON_LANES for details on the pipsline.
|
|
5035
|
+
*
|
|
4131
5036
|
* XXH3_64bits_withSeed, len == 256, Snapdragon 835
|
|
4132
5037
|
* without hack: 2654.4 MB/s
|
|
4133
5038
|
* with hack: 3202.9 MB/s
|
|
@@ -4157,7 +5062,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
|
|
|
4157
5062
|
}
|
|
4158
5063
|
|
|
4159
5064
|
|
|
4160
|
-
typedef void (*
|
|
5065
|
+
typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
|
|
4161
5066
|
typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
|
|
4162
5067
|
typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
|
|
4163
5068
|
|
|
@@ -4165,82 +5070,63 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
|
|
|
4165
5070
|
#if (XXH_VECTOR == XXH_AVX512)
|
|
4166
5071
|
|
|
4167
5072
|
#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
|
|
5073
|
+
#define XXH3_accumulate XXH3_accumulate_avx512
|
|
4168
5074
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_avx512
|
|
4169
5075
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
|
|
4170
5076
|
|
|
4171
5077
|
#elif (XXH_VECTOR == XXH_AVX2)
|
|
4172
5078
|
|
|
4173
5079
|
#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
|
|
5080
|
+
#define XXH3_accumulate XXH3_accumulate_avx2
|
|
4174
5081
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_avx2
|
|
4175
5082
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
|
|
4176
5083
|
|
|
4177
5084
|
#elif (XXH_VECTOR == XXH_SSE2)
|
|
4178
5085
|
|
|
4179
5086
|
#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
|
|
5087
|
+
#define XXH3_accumulate XXH3_accumulate_sse2
|
|
4180
5088
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_sse2
|
|
4181
5089
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
|
|
4182
5090
|
|
|
4183
5091
|
#elif (XXH_VECTOR == XXH_NEON)
|
|
4184
5092
|
|
|
4185
5093
|
#define XXH3_accumulate_512 XXH3_accumulate_512_neon
|
|
5094
|
+
#define XXH3_accumulate XXH3_accumulate_neon
|
|
4186
5095
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_neon
|
|
4187
5096
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
|
4188
5097
|
|
|
4189
5098
|
#elif (XXH_VECTOR == XXH_VSX)
|
|
4190
5099
|
|
|
4191
5100
|
#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
|
|
5101
|
+
#define XXH3_accumulate XXH3_accumulate_vsx
|
|
4192
5102
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_vsx
|
|
4193
5103
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
|
4194
5104
|
|
|
5105
|
+
#elif (XXH_VECTOR == XXH_SVE)
|
|
5106
|
+
#define XXH3_accumulate_512 XXH3_accumulate_512_sve
|
|
5107
|
+
#define XXH3_accumulate XXH3_accumulate_sve
|
|
5108
|
+
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
|
|
5109
|
+
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
|
5110
|
+
|
|
4195
5111
|
#else /* scalar */
|
|
4196
5112
|
|
|
4197
5113
|
#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
|
|
5114
|
+
#define XXH3_accumulate XXH3_accumulate_scalar
|
|
4198
5115
|
#define XXH3_scrambleAcc XXH3_scrambleAcc_scalar
|
|
4199
5116
|
#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
|
4200
5117
|
|
|
4201
5118
|
#endif
|
|
4202
5119
|
|
|
4203
|
-
|
|
4204
|
-
|
|
4205
|
-
#
|
|
4206
|
-
#
|
|
4207
|
-
# define XXH_PREFETCH_DIST 320
|
|
4208
|
-
# else
|
|
4209
|
-
# if (XXH_VECTOR == XXH_AVX512)
|
|
4210
|
-
# define XXH_PREFETCH_DIST 512
|
|
4211
|
-
# else
|
|
4212
|
-
# define XXH_PREFETCH_DIST 384
|
|
4213
|
-
# endif
|
|
4214
|
-
# endif /* __clang__ */
|
|
4215
|
-
#endif /* XXH_PREFETCH_DIST */
|
|
4216
|
-
|
|
4217
|
-
/*
|
|
4218
|
-
* XXH3_accumulate()
|
|
4219
|
-
* Loops over XXH3_accumulate_512().
|
|
4220
|
-
* Assumption: nbStripes will not overflow the secret size
|
|
4221
|
-
*/
|
|
4222
|
-
XXH_FORCE_INLINE void
|
|
4223
|
-
XXH3_accumulate( xxh_u64* XXH_RESTRICT acc,
|
|
4224
|
-
const xxh_u8* XXH_RESTRICT input,
|
|
4225
|
-
const xxh_u8* XXH_RESTRICT secret,
|
|
4226
|
-
size_t nbStripes,
|
|
4227
|
-
XXH3_f_accumulate_512 f_acc512)
|
|
4228
|
-
{
|
|
4229
|
-
size_t n;
|
|
4230
|
-
for (n = 0; n < nbStripes; n++ ) {
|
|
4231
|
-
const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
|
|
4232
|
-
XXH_PREFETCH(in + XXH_PREFETCH_DIST);
|
|
4233
|
-
f_acc512(acc,
|
|
4234
|
-
in,
|
|
4235
|
-
secret + n*XXH_SECRET_CONSUME_RATE);
|
|
4236
|
-
}
|
|
4237
|
-
}
|
|
5120
|
+
#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
|
|
5121
|
+
# undef XXH3_initCustomSecret
|
|
5122
|
+
# define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
|
|
5123
|
+
#endif
|
|
4238
5124
|
|
|
4239
5125
|
XXH_FORCE_INLINE void
|
|
4240
5126
|
XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
|
|
4241
5127
|
const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
4242
5128
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
|
4243
|
-
|
|
5129
|
+
XXH3_f_accumulate f_acc,
|
|
4244
5130
|
XXH3_f_scrambleAcc f_scramble)
|
|
4245
5131
|
{
|
|
4246
5132
|
size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
|
|
@@ -4252,7 +5138,7 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
|
|
|
4252
5138
|
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
|
4253
5139
|
|
|
4254
5140
|
for (n = 0; n < nb_blocks; n++) {
|
|
4255
|
-
|
|
5141
|
+
f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
|
|
4256
5142
|
f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
|
|
4257
5143
|
}
|
|
4258
5144
|
|
|
@@ -4260,12 +5146,12 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
|
|
|
4260
5146
|
XXH_ASSERT(len > XXH_STRIPE_LEN);
|
|
4261
5147
|
{ size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
|
|
4262
5148
|
XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
|
|
4263
|
-
|
|
5149
|
+
f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
|
|
4264
5150
|
|
|
4265
5151
|
/* last stripe */
|
|
4266
5152
|
{ const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
|
|
4267
5153
|
#define XXH_SECRET_LASTACC_START 7 /* not aligned on 8, last secret is different from acc & scrambler */
|
|
4268
|
-
|
|
5154
|
+
XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
|
|
4269
5155
|
} }
|
|
4270
5156
|
}
|
|
4271
5157
|
|
|
@@ -4310,12 +5196,12 @@ XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secre
|
|
|
4310
5196
|
XXH_FORCE_INLINE XXH64_hash_t
|
|
4311
5197
|
XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
4312
5198
|
const void* XXH_RESTRICT secret, size_t secretSize,
|
|
4313
|
-
|
|
5199
|
+
XXH3_f_accumulate f_acc,
|
|
4314
5200
|
XXH3_f_scrambleAcc f_scramble)
|
|
4315
5201
|
{
|
|
4316
5202
|
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
|
|
4317
5203
|
|
|
4318
|
-
XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize,
|
|
5204
|
+
XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
|
|
4319
5205
|
|
|
4320
5206
|
/* converge into final hash */
|
|
4321
5207
|
XXH_STATIC_ASSERT(sizeof(acc) == 64);
|
|
@@ -4326,29 +5212,30 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
|
4326
5212
|
}
|
|
4327
5213
|
|
|
4328
5214
|
/*
|
|
4329
|
-
* It's important for performance
|
|
5215
|
+
* It's important for performance to transmit secret's size (when it's static)
|
|
5216
|
+
* so that the compiler can properly optimize the vectorized loop.
|
|
5217
|
+
* This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
|
|
4330
5218
|
*/
|
|
4331
|
-
|
|
5219
|
+
XXH_FORCE_INLINE XXH64_hash_t
|
|
4332
5220
|
XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
|
4333
5221
|
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
|
4334
5222
|
{
|
|
4335
5223
|
(void)seed64;
|
|
4336
|
-
return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
|
|
5224
|
+
return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
|
|
4337
5225
|
}
|
|
4338
5226
|
|
|
4339
5227
|
/*
|
|
4340
|
-
* It's
|
|
4341
|
-
*
|
|
4342
|
-
*
|
|
4343
|
-
*
|
|
4344
|
-
* and uses this opportunity to streamline the generated code for better performance.
|
|
5228
|
+
* It's preferable for performance that XXH3_hashLong is not inlined,
|
|
5229
|
+
* as it results in a smaller function for small data, easier to the instruction cache.
|
|
5230
|
+
* Note that inside this no_inline function, we do inline the internal loop,
|
|
5231
|
+
* and provide a statically defined secret size to allow optimization of vector loop.
|
|
4345
5232
|
*/
|
|
4346
|
-
XXH_NO_INLINE XXH64_hash_t
|
|
5233
|
+
XXH_NO_INLINE XXH_PUREF XXH64_hash_t
|
|
4347
5234
|
XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
|
|
4348
5235
|
XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
|
4349
5236
|
{
|
|
4350
5237
|
(void)seed64; (void)secret; (void)secretLen;
|
|
4351
|
-
return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
|
|
5238
|
+
return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
|
|
4352
5239
|
}
|
|
4353
5240
|
|
|
4354
5241
|
/*
|
|
@@ -4365,18 +5252,20 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
|
|
|
4365
5252
|
XXH_FORCE_INLINE XXH64_hash_t
|
|
4366
5253
|
XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
|
|
4367
5254
|
XXH64_hash_t seed,
|
|
4368
|
-
|
|
5255
|
+
XXH3_f_accumulate f_acc,
|
|
4369
5256
|
XXH3_f_scrambleAcc f_scramble,
|
|
4370
5257
|
XXH3_f_initCustomSecret f_initSec)
|
|
4371
5258
|
{
|
|
5259
|
+
#if XXH_SIZE_OPT <= 0
|
|
4372
5260
|
if (seed == 0)
|
|
4373
5261
|
return XXH3_hashLong_64b_internal(input, len,
|
|
4374
5262
|
XXH3_kSecret, sizeof(XXH3_kSecret),
|
|
4375
|
-
|
|
5263
|
+
f_acc, f_scramble);
|
|
5264
|
+
#endif
|
|
4376
5265
|
{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
|
4377
5266
|
f_initSec(secret, seed);
|
|
4378
5267
|
return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
|
|
4379
|
-
|
|
5268
|
+
f_acc, f_scramble);
|
|
4380
5269
|
}
|
|
4381
5270
|
}
|
|
4382
5271
|
|
|
@@ -4384,12 +5273,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
|
|
|
4384
5273
|
* It's important for performance that XXH3_hashLong is not inlined.
|
|
4385
5274
|
*/
|
|
4386
5275
|
XXH_NO_INLINE XXH64_hash_t
|
|
4387
|
-
XXH3_hashLong_64b_withSeed(const void* input, size_t len,
|
|
4388
|
-
XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
|
|
5276
|
+
XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
|
|
5277
|
+
XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
|
|
4389
5278
|
{
|
|
4390
5279
|
(void)secret; (void)secretLen;
|
|
4391
5280
|
return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
|
|
4392
|
-
|
|
5281
|
+
XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
|
|
4393
5282
|
}
|
|
4394
5283
|
|
|
4395
5284
|
|
|
@@ -4421,29 +5310,37 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
|
|
|
4421
5310
|
|
|
4422
5311
|
/* === Public entry point === */
|
|
4423
5312
|
|
|
4424
|
-
/*! @ingroup
|
|
4425
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t
|
|
5313
|
+
/*! @ingroup XXH3_family */
|
|
5314
|
+
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
|
|
4426
5315
|
{
|
|
4427
|
-
return XXH3_64bits_internal(input,
|
|
5316
|
+
return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
|
|
4428
5317
|
}
|
|
4429
5318
|
|
|
4430
|
-
/*! @ingroup
|
|
5319
|
+
/*! @ingroup XXH3_family */
|
|
4431
5320
|
XXH_PUBLIC_API XXH64_hash_t
|
|
4432
|
-
XXH3_64bits_withSecret(const void* input, size_t
|
|
5321
|
+
XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
|
|
4433
5322
|
{
|
|
4434
|
-
return XXH3_64bits_internal(input,
|
|
5323
|
+
return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
|
|
4435
5324
|
}
|
|
4436
5325
|
|
|
4437
|
-
/*! @ingroup
|
|
5326
|
+
/*! @ingroup XXH3_family */
|
|
4438
5327
|
XXH_PUBLIC_API XXH64_hash_t
|
|
4439
|
-
XXH3_64bits_withSeed(const void* input, size_t
|
|
5328
|
+
XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
|
|
4440
5329
|
{
|
|
4441
|
-
return XXH3_64bits_internal(input,
|
|
5330
|
+
return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
|
|
4442
5331
|
}
|
|
4443
5332
|
|
|
5333
|
+
XXH_PUBLIC_API XXH64_hash_t
|
|
5334
|
+
XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
|
|
5335
|
+
{
|
|
5336
|
+
if (length <= XXH3_MIDSIZE_MAX)
|
|
5337
|
+
return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
|
5338
|
+
return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
|
|
5339
|
+
}
|
|
4444
5340
|
|
|
4445
|
-
/* === XXH3 streaming === */
|
|
4446
5341
|
|
|
5342
|
+
/* === XXH3 streaming === */
|
|
5343
|
+
#ifndef XXH_NO_STREAM
|
|
4447
5344
|
/*
|
|
4448
5345
|
* Malloc's a pointer that is always aligned to align.
|
|
4449
5346
|
*
|
|
@@ -4467,7 +5364,7 @@ XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
|
4467
5364
|
*
|
|
4468
5365
|
* Align must be a power of 2 and 8 <= align <= 128.
|
|
4469
5366
|
*/
|
|
4470
|
-
static void* XXH_alignedMalloc(size_t s, size_t align)
|
|
5367
|
+
static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
|
|
4471
5368
|
{
|
|
4472
5369
|
XXH_ASSERT(align <= 128 && align >= 8); /* range check */
|
|
4473
5370
|
XXH_ASSERT((align & (align-1)) == 0); /* power of 2 */
|
|
@@ -4509,7 +5406,7 @@ static void XXH_alignedFree(void* p)
|
|
|
4509
5406
|
XXH_free(base);
|
|
4510
5407
|
}
|
|
4511
5408
|
}
|
|
4512
|
-
/*! @ingroup
|
|
5409
|
+
/*! @ingroup XXH3_family */
|
|
4513
5410
|
XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
|
|
4514
5411
|
{
|
|
4515
5412
|
XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
|
|
@@ -4518,24 +5415,24 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
|
|
|
4518
5415
|
return state;
|
|
4519
5416
|
}
|
|
4520
5417
|
|
|
4521
|
-
/*! @ingroup
|
|
5418
|
+
/*! @ingroup XXH3_family */
|
|
4522
5419
|
XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
|
|
4523
5420
|
{
|
|
4524
5421
|
XXH_alignedFree(statePtr);
|
|
4525
5422
|
return XXH_OK;
|
|
4526
5423
|
}
|
|
4527
5424
|
|
|
4528
|
-
/*! @ingroup
|
|
5425
|
+
/*! @ingroup XXH3_family */
|
|
4529
5426
|
XXH_PUBLIC_API void
|
|
4530
|
-
XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
|
|
5427
|
+
XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
|
|
4531
5428
|
{
|
|
4532
|
-
|
|
5429
|
+
XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
|
|
4533
5430
|
}
|
|
4534
5431
|
|
|
4535
5432
|
static void
|
|
4536
5433
|
XXH3_reset_internal(XXH3_state_t* statePtr,
|
|
4537
|
-
|
|
4538
|
-
|
|
5434
|
+
XXH64_hash_t seed,
|
|
5435
|
+
const void* secret, size_t secretSize)
|
|
4539
5436
|
{
|
|
4540
5437
|
size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
|
|
4541
5438
|
size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
|
|
@@ -4552,24 +5449,25 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
|
|
|
4552
5449
|
statePtr->acc[6] = XXH_PRIME64_5;
|
|
4553
5450
|
statePtr->acc[7] = XXH_PRIME32_1;
|
|
4554
5451
|
statePtr->seed = seed;
|
|
5452
|
+
statePtr->useSeed = (seed != 0);
|
|
4555
5453
|
statePtr->extSecret = (const unsigned char*)secret;
|
|
4556
5454
|
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
|
4557
5455
|
statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
|
|
4558
5456
|
statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
|
|
4559
5457
|
}
|
|
4560
5458
|
|
|
4561
|
-
/*! @ingroup
|
|
5459
|
+
/*! @ingroup XXH3_family */
|
|
4562
5460
|
XXH_PUBLIC_API XXH_errorcode
|
|
4563
|
-
XXH3_64bits_reset(XXH3_state_t* statePtr)
|
|
5461
|
+
XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
|
|
4564
5462
|
{
|
|
4565
5463
|
if (statePtr == NULL) return XXH_ERROR;
|
|
4566
5464
|
XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
|
4567
5465
|
return XXH_OK;
|
|
4568
5466
|
}
|
|
4569
5467
|
|
|
4570
|
-
/*! @ingroup
|
|
5468
|
+
/*! @ingroup XXH3_family */
|
|
4571
5469
|
XXH_PUBLIC_API XXH_errorcode
|
|
4572
|
-
XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
|
|
5470
|
+
XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
|
|
4573
5471
|
{
|
|
4574
5472
|
if (statePtr == NULL) return XXH_ERROR;
|
|
4575
5473
|
XXH3_reset_internal(statePtr, 0, secret, secretSize);
|
|
@@ -4578,17 +5476,30 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t
|
|
|
4578
5476
|
return XXH_OK;
|
|
4579
5477
|
}
|
|
4580
5478
|
|
|
4581
|
-
/*! @ingroup
|
|
5479
|
+
/*! @ingroup XXH3_family */
|
|
4582
5480
|
XXH_PUBLIC_API XXH_errorcode
|
|
4583
|
-
XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
|
5481
|
+
XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
|
|
4584
5482
|
{
|
|
4585
5483
|
if (statePtr == NULL) return XXH_ERROR;
|
|
4586
5484
|
if (seed==0) return XXH3_64bits_reset(statePtr);
|
|
4587
|
-
if (seed != statePtr->seed)
|
|
5485
|
+
if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
|
|
5486
|
+
XXH3_initCustomSecret(statePtr->customSecret, seed);
|
|
4588
5487
|
XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
|
|
4589
5488
|
return XXH_OK;
|
|
4590
5489
|
}
|
|
4591
5490
|
|
|
5491
|
+
/*! @ingroup XXH3_family */
|
|
5492
|
+
XXH_PUBLIC_API XXH_errorcode
|
|
5493
|
+
XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
|
|
5494
|
+
{
|
|
5495
|
+
if (statePtr == NULL) return XXH_ERROR;
|
|
5496
|
+
if (secret == NULL) return XXH_ERROR;
|
|
5497
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
|
5498
|
+
XXH3_reset_internal(statePtr, seed64, secret, secretSize);
|
|
5499
|
+
statePtr->useSeed = 1; /* always, even if seed64==0 */
|
|
5500
|
+
return XXH_OK;
|
|
5501
|
+
}
|
|
5502
|
+
|
|
4592
5503
|
/* Note : when XXH3_consumeStripes() is invoked,
|
|
4593
5504
|
* there must be a guarantee that at least one more byte must be consumed from input
|
|
4594
5505
|
* so that the function can blindly consume all stripes using the "normal" secret segment */
|
|
@@ -4597,7 +5508,7 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
|
|
|
4597
5508
|
size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
|
|
4598
5509
|
const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
|
|
4599
5510
|
const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
|
|
4600
|
-
|
|
5511
|
+
XXH3_f_accumulate f_acc,
|
|
4601
5512
|
XXH3_f_scrambleAcc f_scramble)
|
|
4602
5513
|
{
|
|
4603
5514
|
XXH_ASSERT(nbStripes <= nbStripesPerBlock); /* can handle max 1 scramble per invocation */
|
|
@@ -4606,45 +5517,58 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
|
|
|
4606
5517
|
/* need a scrambling operation */
|
|
4607
5518
|
size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
|
|
4608
5519
|
size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
|
|
4609
|
-
|
|
5520
|
+
f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock);
|
|
4610
5521
|
f_scramble(acc, secret + secretLimit);
|
|
4611
|
-
|
|
5522
|
+
f_acc(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock);
|
|
4612
5523
|
*nbStripesSoFarPtr = nbStripesAfterBlock;
|
|
4613
5524
|
} else {
|
|
4614
|
-
|
|
5525
|
+
f_acc(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes);
|
|
4615
5526
|
*nbStripesSoFarPtr += nbStripes;
|
|
4616
5527
|
}
|
|
4617
5528
|
}
|
|
4618
5529
|
|
|
5530
|
+
#ifndef XXH3_STREAM_USE_STACK
|
|
5531
|
+
# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
|
|
5532
|
+
# define XXH3_STREAM_USE_STACK 1
|
|
5533
|
+
# endif
|
|
5534
|
+
#endif
|
|
4619
5535
|
/*
|
|
4620
5536
|
* Both XXH3_64bits_update and XXH3_128bits_update use this routine.
|
|
4621
5537
|
*/
|
|
4622
5538
|
XXH_FORCE_INLINE XXH_errorcode
|
|
4623
|
-
XXH3_update(XXH3_state_t* state,
|
|
4624
|
-
const xxh_u8* input, size_t len,
|
|
4625
|
-
|
|
5539
|
+
XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
|
|
5540
|
+
const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
5541
|
+
XXH3_f_accumulate f_acc,
|
|
4626
5542
|
XXH3_f_scrambleAcc f_scramble)
|
|
4627
5543
|
{
|
|
4628
|
-
if (input==NULL)
|
|
4629
|
-
|
|
5544
|
+
if (input==NULL) {
|
|
5545
|
+
XXH_ASSERT(len == 0);
|
|
4630
5546
|
return XXH_OK;
|
|
4631
|
-
|
|
4632
|
-
return XXH_ERROR;
|
|
4633
|
-
#endif
|
|
5547
|
+
}
|
|
4634
5548
|
|
|
5549
|
+
XXH_ASSERT(state != NULL);
|
|
4635
5550
|
{ const xxh_u8* const bEnd = input + len;
|
|
4636
5551
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
|
4637
|
-
|
|
5552
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
|
5553
|
+
/* For some reason, gcc and MSVC seem to suffer greatly
|
|
5554
|
+
* when operating accumulators directly into state.
|
|
5555
|
+
* Operating into stack space seems to enable proper optimization.
|
|
5556
|
+
* clang, on the other hand, doesn't seem to need this trick */
|
|
5557
|
+
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
|
|
5558
|
+
#else
|
|
5559
|
+
xxh_u64* XXH_RESTRICT const acc = state->acc;
|
|
5560
|
+
#endif
|
|
4638
5561
|
state->totalLen += len;
|
|
4639
5562
|
XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
|
|
4640
5563
|
|
|
4641
|
-
|
|
5564
|
+
/* small input : just fill in tmp buffer */
|
|
5565
|
+
if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
|
|
4642
5566
|
XXH_memcpy(state->buffer + state->bufferedSize, input, len);
|
|
4643
5567
|
state->bufferedSize += (XXH32_hash_t)len;
|
|
4644
5568
|
return XXH_OK;
|
|
4645
5569
|
}
|
|
4646
|
-
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
|
4647
5570
|
|
|
5571
|
+
/* total input is now > XXH3_INTERNALBUFFER_SIZE */
|
|
4648
5572
|
#define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
|
|
4649
5573
|
XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0); /* clean multiple */
|
|
4650
5574
|
|
|
@@ -4656,45 +5580,82 @@ XXH3_update(XXH3_state_t* state,
|
|
|
4656
5580
|
size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
|
|
4657
5581
|
XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
|
|
4658
5582
|
input += loadSize;
|
|
4659
|
-
XXH3_consumeStripes(
|
|
5583
|
+
XXH3_consumeStripes(acc,
|
|
4660
5584
|
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
|
4661
5585
|
state->buffer, XXH3_INTERNALBUFFER_STRIPES,
|
|
4662
5586
|
secret, state->secretLimit,
|
|
4663
|
-
|
|
5587
|
+
f_acc, f_scramble);
|
|
4664
5588
|
state->bufferedSize = 0;
|
|
4665
5589
|
}
|
|
4666
5590
|
XXH_ASSERT(input < bEnd);
|
|
4667
5591
|
|
|
4668
|
-
/*
|
|
4669
|
-
if (input
|
|
4670
|
-
|
|
4671
|
-
|
|
4672
|
-
|
|
4673
|
-
|
|
4674
|
-
|
|
4675
|
-
|
|
4676
|
-
|
|
4677
|
-
|
|
4678
|
-
|
|
4679
|
-
|
|
4680
|
-
|
|
5592
|
+
/* large input to consume : ingest per full block */
|
|
5593
|
+
if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
|
|
5594
|
+
size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
|
|
5595
|
+
XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
|
|
5596
|
+
/* join to current block's end */
|
|
5597
|
+
{ size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
|
|
5598
|
+
XXH_ASSERT(nbStripesToEnd <= nbStripes);
|
|
5599
|
+
f_acc(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd);
|
|
5600
|
+
f_scramble(acc, secret + state->secretLimit);
|
|
5601
|
+
state->nbStripesSoFar = 0;
|
|
5602
|
+
input += nbStripesToEnd * XXH_STRIPE_LEN;
|
|
5603
|
+
nbStripes -= nbStripesToEnd;
|
|
5604
|
+
}
|
|
5605
|
+
/* consume per entire blocks */
|
|
5606
|
+
while(nbStripes >= state->nbStripesPerBlock) {
|
|
5607
|
+
f_acc(acc, input, secret, state->nbStripesPerBlock);
|
|
5608
|
+
f_scramble(acc, secret + state->secretLimit);
|
|
5609
|
+
input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
|
|
5610
|
+
nbStripes -= state->nbStripesPerBlock;
|
|
5611
|
+
}
|
|
5612
|
+
/* consume last partial block */
|
|
5613
|
+
f_acc(acc, input, secret, nbStripes);
|
|
5614
|
+
input += nbStripes * XXH_STRIPE_LEN;
|
|
5615
|
+
XXH_ASSERT(input < bEnd); /* at least some bytes left */
|
|
5616
|
+
state->nbStripesSoFar = nbStripes;
|
|
5617
|
+
/* buffer predecessor of last partial stripe */
|
|
5618
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
|
5619
|
+
XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
|
|
5620
|
+
} else {
|
|
5621
|
+
/* content to consume <= block size */
|
|
5622
|
+
/* Consume input by a multiple of internal buffer size */
|
|
5623
|
+
if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
|
|
5624
|
+
const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
|
|
5625
|
+
do {
|
|
5626
|
+
XXH3_consumeStripes(acc,
|
|
5627
|
+
&state->nbStripesSoFar, state->nbStripesPerBlock,
|
|
5628
|
+
input, XXH3_INTERNALBUFFER_STRIPES,
|
|
5629
|
+
secret, state->secretLimit,
|
|
5630
|
+
f_acc, f_scramble);
|
|
5631
|
+
input += XXH3_INTERNALBUFFER_SIZE;
|
|
5632
|
+
} while (input<limit);
|
|
5633
|
+
/* buffer predecessor of last partial stripe */
|
|
5634
|
+
XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
|
|
5635
|
+
}
|
|
4681
5636
|
}
|
|
4682
|
-
XXH_ASSERT(input < bEnd);
|
|
4683
5637
|
|
|
4684
5638
|
/* Some remaining input (always) : buffer it */
|
|
5639
|
+
XXH_ASSERT(input < bEnd);
|
|
5640
|
+
XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
|
|
5641
|
+
XXH_ASSERT(state->bufferedSize == 0);
|
|
4685
5642
|
XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
|
|
4686
5643
|
state->bufferedSize = (XXH32_hash_t)(bEnd-input);
|
|
5644
|
+
#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
|
|
5645
|
+
/* save stack accumulators into state */
|
|
5646
|
+
memcpy(state->acc, acc, sizeof(acc));
|
|
5647
|
+
#endif
|
|
4687
5648
|
}
|
|
4688
5649
|
|
|
4689
5650
|
return XXH_OK;
|
|
4690
5651
|
}
|
|
4691
5652
|
|
|
4692
|
-
/*! @ingroup
|
|
5653
|
+
/*! @ingroup XXH3_family */
|
|
4693
5654
|
XXH_PUBLIC_API XXH_errorcode
|
|
4694
|
-
XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
|
|
5655
|
+
XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
|
|
4695
5656
|
{
|
|
4696
5657
|
return XXH3_update(state, (const xxh_u8*)input, len,
|
|
4697
|
-
|
|
5658
|
+
XXH3_accumulate, XXH3_scrambleAcc);
|
|
4698
5659
|
}
|
|
4699
5660
|
|
|
4700
5661
|
|
|
@@ -4707,7 +5668,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
|
4707
5668
|
* Digest on a local copy. This way, the state remains unaltered, and it can
|
|
4708
5669
|
* continue ingesting more input afterwards.
|
|
4709
5670
|
*/
|
|
4710
|
-
|
|
5671
|
+
XXH_memcpy(acc, state->acc, sizeof(state->acc));
|
|
4711
5672
|
if (state->bufferedSize >= XXH_STRIPE_LEN) {
|
|
4712
5673
|
size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
|
|
4713
5674
|
size_t nbStripesSoFar = state->nbStripesSoFar;
|
|
@@ -4715,7 +5676,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
|
4715
5676
|
&nbStripesSoFar, state->nbStripesPerBlock,
|
|
4716
5677
|
state->buffer, nbStripes,
|
|
4717
5678
|
secret, state->secretLimit,
|
|
4718
|
-
|
|
5679
|
+
XXH3_accumulate, XXH3_scrambleAcc);
|
|
4719
5680
|
/* last stripe */
|
|
4720
5681
|
XXH3_accumulate_512(acc,
|
|
4721
5682
|
state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
|
|
@@ -4724,16 +5685,16 @@ XXH3_digest_long (XXH64_hash_t* acc,
|
|
|
4724
5685
|
xxh_u8 lastStripe[XXH_STRIPE_LEN];
|
|
4725
5686
|
size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
|
|
4726
5687
|
XXH_ASSERT(state->bufferedSize > 0); /* there is always some input buffered */
|
|
4727
|
-
|
|
4728
|
-
|
|
5688
|
+
XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
|
|
5689
|
+
XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
|
|
4729
5690
|
XXH3_accumulate_512(acc,
|
|
4730
5691
|
lastStripe,
|
|
4731
5692
|
secret + state->secretLimit - XXH_SECRET_LASTACC_START);
|
|
4732
5693
|
}
|
|
4733
5694
|
}
|
|
4734
5695
|
|
|
4735
|
-
/*! @ingroup
|
|
4736
|
-
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
|
|
5696
|
+
/*! @ingroup XXH3_family */
|
|
5697
|
+
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
|
|
4737
5698
|
{
|
|
4738
5699
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
|
4739
5700
|
if (state->totalLen > XXH3_MIDSIZE_MAX) {
|
|
@@ -4744,57 +5705,12 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
|
|
|
4744
5705
|
(xxh_u64)state->totalLen * XXH_PRIME64_1);
|
|
4745
5706
|
}
|
|
4746
5707
|
/* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
|
|
4747
|
-
if (state->
|
|
5708
|
+
if (state->useSeed)
|
|
4748
5709
|
return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
|
|
4749
5710
|
return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
|
|
4750
5711
|
secret, state->secretLimit + XXH_STRIPE_LEN);
|
|
4751
5712
|
}
|
|
4752
|
-
|
|
4753
|
-
|
|
4754
|
-
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
|
4755
|
-
|
|
4756
|
-
/*! @ingroup xxh3_family */
|
|
4757
|
-
XXH_PUBLIC_API void
|
|
4758
|
-
XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSeedSize)
|
|
4759
|
-
{
|
|
4760
|
-
XXH_ASSERT(secretBuffer != NULL);
|
|
4761
|
-
if (customSeedSize == 0) {
|
|
4762
|
-
memcpy(secretBuffer, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
|
4763
|
-
return;
|
|
4764
|
-
}
|
|
4765
|
-
XXH_ASSERT(customSeed != NULL);
|
|
4766
|
-
|
|
4767
|
-
{ size_t const segmentSize = sizeof(XXH128_hash_t);
|
|
4768
|
-
size_t const nbSegments = XXH_SECRET_DEFAULT_SIZE / segmentSize;
|
|
4769
|
-
XXH128_canonical_t scrambler;
|
|
4770
|
-
XXH64_hash_t seeds[12];
|
|
4771
|
-
size_t segnb;
|
|
4772
|
-
XXH_ASSERT(nbSegments == 12);
|
|
4773
|
-
XXH_ASSERT(segmentSize * nbSegments == XXH_SECRET_DEFAULT_SIZE); /* exact multiple */
|
|
4774
|
-
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
|
4775
|
-
|
|
4776
|
-
/*
|
|
4777
|
-
* Copy customSeed to seeds[], truncating or repeating as necessary.
|
|
4778
|
-
*/
|
|
4779
|
-
{ size_t toFill = XXH_MIN(customSeedSize, sizeof(seeds));
|
|
4780
|
-
size_t filled = toFill;
|
|
4781
|
-
memcpy(seeds, customSeed, toFill);
|
|
4782
|
-
while (filled < sizeof(seeds)) {
|
|
4783
|
-
toFill = XXH_MIN(filled, sizeof(seeds) - filled);
|
|
4784
|
-
memcpy((char*)seeds + filled, seeds, toFill);
|
|
4785
|
-
filled += toFill;
|
|
4786
|
-
} }
|
|
4787
|
-
|
|
4788
|
-
/* generate secret */
|
|
4789
|
-
memcpy(secretBuffer, &scrambler, sizeof(scrambler));
|
|
4790
|
-
for (segnb=1; segnb < nbSegments; segnb++) {
|
|
4791
|
-
size_t const segmentStart = segnb * segmentSize;
|
|
4792
|
-
XXH128_canonical_t segment;
|
|
4793
|
-
XXH128_canonicalFromHash(&segment,
|
|
4794
|
-
XXH128(&scrambler, sizeof(scrambler), XXH_readLE64(seeds + segnb) + segnb) );
|
|
4795
|
-
memcpy((char*)secretBuffer + segmentStart, &segment, sizeof(segment));
|
|
4796
|
-
} }
|
|
4797
|
-
}
|
|
5713
|
+
#endif /* !XXH_NO_STREAM */
|
|
4798
5714
|
|
|
4799
5715
|
|
|
4800
5716
|
/* ==========================================
|
|
@@ -4814,7 +5730,7 @@ XXH3_generateSecret(void* secretBuffer, const void* customSeed, size_t customSee
|
|
|
4814
5730
|
* fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
|
|
4815
5731
|
*/
|
|
4816
5732
|
|
|
4817
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
|
5733
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
|
4818
5734
|
XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
4819
5735
|
{
|
|
4820
5736
|
/* A doubled version of 1to3_64b with different constants. */
|
|
@@ -4843,7 +5759,7 @@ XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
|
|
|
4843
5759
|
}
|
|
4844
5760
|
}
|
|
4845
5761
|
|
|
4846
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
|
5762
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
|
4847
5763
|
XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
4848
5764
|
{
|
|
4849
5765
|
XXH_ASSERT(input != NULL);
|
|
@@ -4870,7 +5786,7 @@ XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
|
|
|
4870
5786
|
}
|
|
4871
5787
|
}
|
|
4872
5788
|
|
|
4873
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
|
5789
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
|
4874
5790
|
XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
4875
5791
|
{
|
|
4876
5792
|
XXH_ASSERT(input != NULL);
|
|
@@ -4945,7 +5861,7 @@ XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
|
|
|
4945
5861
|
/*
|
|
4946
5862
|
* Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
|
|
4947
5863
|
*/
|
|
4948
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
|
5864
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
|
4949
5865
|
XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
|
|
4950
5866
|
{
|
|
4951
5867
|
XXH_ASSERT(len <= 16);
|
|
@@ -4976,7 +5892,7 @@ XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
|
|
|
4976
5892
|
}
|
|
4977
5893
|
|
|
4978
5894
|
|
|
4979
|
-
XXH_FORCE_INLINE XXH128_hash_t
|
|
5895
|
+
XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
|
|
4980
5896
|
XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
4981
5897
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
|
4982
5898
|
XXH64_hash_t seed)
|
|
@@ -4987,6 +5903,16 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
4987
5903
|
{ XXH128_hash_t acc;
|
|
4988
5904
|
acc.low64 = len * XXH_PRIME64_1;
|
|
4989
5905
|
acc.high64 = 0;
|
|
5906
|
+
|
|
5907
|
+
#if XXH_SIZE_OPT >= 1
|
|
5908
|
+
{
|
|
5909
|
+
/* Smaller, but slightly slower. */
|
|
5910
|
+
unsigned int i = (unsigned int)(len - 1) / 32;
|
|
5911
|
+
do {
|
|
5912
|
+
acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
|
|
5913
|
+
} while (i-- != 0);
|
|
5914
|
+
}
|
|
5915
|
+
#else
|
|
4990
5916
|
if (len > 32) {
|
|
4991
5917
|
if (len > 64) {
|
|
4992
5918
|
if (len > 96) {
|
|
@@ -4997,6 +5923,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
4997
5923
|
acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
|
|
4998
5924
|
}
|
|
4999
5925
|
acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
|
|
5926
|
+
#endif
|
|
5000
5927
|
{ XXH128_hash_t h128;
|
|
5001
5928
|
h128.low64 = acc.low64 + acc.high64;
|
|
5002
5929
|
h128.high64 = (acc.low64 * XXH_PRIME64_1)
|
|
@@ -5009,7 +5936,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
5009
5936
|
}
|
|
5010
5937
|
}
|
|
5011
5938
|
|
|
5012
|
-
XXH_NO_INLINE XXH128_hash_t
|
|
5939
|
+
XXH_NO_INLINE XXH_PUREF XXH128_hash_t
|
|
5013
5940
|
XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
5014
5941
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
|
5015
5942
|
XXH64_hash_t seed)
|
|
@@ -5018,25 +5945,34 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
5018
5945
|
XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
|
|
5019
5946
|
|
|
5020
5947
|
{ XXH128_hash_t acc;
|
|
5021
|
-
|
|
5022
|
-
int i;
|
|
5948
|
+
unsigned i;
|
|
5023
5949
|
acc.low64 = len * XXH_PRIME64_1;
|
|
5024
5950
|
acc.high64 = 0;
|
|
5025
|
-
|
|
5951
|
+
/*
|
|
5952
|
+
* We set as `i` as offset + 32. We do this so that unchanged
|
|
5953
|
+
* `len` can be used as upper bound. This reaches a sweet spot
|
|
5954
|
+
* where both x86 and aarch64 get simple agen and good codegen
|
|
5955
|
+
* for the loop.
|
|
5956
|
+
*/
|
|
5957
|
+
for (i = 32; i < 160; i += 32) {
|
|
5026
5958
|
acc = XXH128_mix32B(acc,
|
|
5027
|
-
input +
|
|
5028
|
-
input +
|
|
5029
|
-
secret +
|
|
5959
|
+
input + i - 32,
|
|
5960
|
+
input + i - 16,
|
|
5961
|
+
secret + i - 32,
|
|
5030
5962
|
seed);
|
|
5031
5963
|
}
|
|
5032
5964
|
acc.low64 = XXH3_avalanche(acc.low64);
|
|
5033
5965
|
acc.high64 = XXH3_avalanche(acc.high64);
|
|
5034
|
-
|
|
5035
|
-
|
|
5966
|
+
/*
|
|
5967
|
+
* NB: `i <= len` will duplicate the last 32-bytes if
|
|
5968
|
+
* len % 32 was zero. This is an unfortunate necessity to keep
|
|
5969
|
+
* the hash result stable.
|
|
5970
|
+
*/
|
|
5971
|
+
for (i=160; i <= len; i += 32) {
|
|
5036
5972
|
acc = XXH128_mix32B(acc,
|
|
5037
|
-
input +
|
|
5038
|
-
input +
|
|
5039
|
-
secret + XXH3_MIDSIZE_STARTOFFSET +
|
|
5973
|
+
input + i - 32,
|
|
5974
|
+
input + i - 16,
|
|
5975
|
+
secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
|
|
5040
5976
|
seed);
|
|
5041
5977
|
}
|
|
5042
5978
|
/* last bytes */
|
|
@@ -5044,7 +5980,7 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
5044
5980
|
input + len - 16,
|
|
5045
5981
|
input + len - 32,
|
|
5046
5982
|
secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
|
|
5047
|
-
|
|
5983
|
+
(XXH64_hash_t)0 - seed);
|
|
5048
5984
|
|
|
5049
5985
|
{ XXH128_hash_t h128;
|
|
5050
5986
|
h128.low64 = acc.low64 + acc.high64;
|
|
@@ -5061,12 +5997,12 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
|
|
|
5061
5997
|
XXH_FORCE_INLINE XXH128_hash_t
|
|
5062
5998
|
XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
5063
5999
|
const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
|
|
5064
|
-
|
|
6000
|
+
XXH3_f_accumulate f_acc,
|
|
5065
6001
|
XXH3_f_scrambleAcc f_scramble)
|
|
5066
6002
|
{
|
|
5067
6003
|
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
|
|
5068
6004
|
|
|
5069
|
-
XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize,
|
|
6005
|
+
XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
|
|
5070
6006
|
|
|
5071
6007
|
/* converge into final hash */
|
|
5072
6008
|
XXH_STATIC_ASSERT(sizeof(acc) == 64);
|
|
@@ -5084,46 +6020,47 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
|
|
|
5084
6020
|
}
|
|
5085
6021
|
|
|
5086
6022
|
/*
|
|
5087
|
-
* It's important for performance that XXH3_hashLong is not inlined.
|
|
6023
|
+
* It's important for performance that XXH3_hashLong() is not inlined.
|
|
5088
6024
|
*/
|
|
5089
|
-
XXH_NO_INLINE XXH128_hash_t
|
|
6025
|
+
XXH_NO_INLINE XXH_PUREF XXH128_hash_t
|
|
5090
6026
|
XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
|
|
5091
6027
|
XXH64_hash_t seed64,
|
|
5092
6028
|
const void* XXH_RESTRICT secret, size_t secretLen)
|
|
5093
6029
|
{
|
|
5094
6030
|
(void)seed64; (void)secret; (void)secretLen;
|
|
5095
6031
|
return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
|
|
5096
|
-
|
|
6032
|
+
XXH3_accumulate, XXH3_scrambleAcc);
|
|
5097
6033
|
}
|
|
5098
6034
|
|
|
5099
6035
|
/*
|
|
5100
|
-
* It's important for performance
|
|
6036
|
+
* It's important for performance to pass @p secretLen (when it's static)
|
|
6037
|
+
* to the compiler, so that it can properly optimize the vectorized loop.
|
|
5101
6038
|
*/
|
|
5102
|
-
|
|
6039
|
+
XXH_FORCE_INLINE XXH128_hash_t
|
|
5103
6040
|
XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
|
|
5104
6041
|
XXH64_hash_t seed64,
|
|
5105
6042
|
const void* XXH_RESTRICT secret, size_t secretLen)
|
|
5106
6043
|
{
|
|
5107
6044
|
(void)seed64;
|
|
5108
6045
|
return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
|
|
5109
|
-
|
|
6046
|
+
XXH3_accumulate, XXH3_scrambleAcc);
|
|
5110
6047
|
}
|
|
5111
6048
|
|
|
5112
6049
|
XXH_FORCE_INLINE XXH128_hash_t
|
|
5113
6050
|
XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
|
|
5114
6051
|
XXH64_hash_t seed64,
|
|
5115
|
-
|
|
6052
|
+
XXH3_f_accumulate f_acc,
|
|
5116
6053
|
XXH3_f_scrambleAcc f_scramble,
|
|
5117
6054
|
XXH3_f_initCustomSecret f_initSec)
|
|
5118
6055
|
{
|
|
5119
6056
|
if (seed64 == 0)
|
|
5120
6057
|
return XXH3_hashLong_128b_internal(input, len,
|
|
5121
6058
|
XXH3_kSecret, sizeof(XXH3_kSecret),
|
|
5122
|
-
|
|
6059
|
+
f_acc, f_scramble);
|
|
5123
6060
|
{ XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
|
5124
6061
|
f_initSec(secret, seed64);
|
|
5125
6062
|
return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
|
|
5126
|
-
|
|
6063
|
+
f_acc, f_scramble);
|
|
5127
6064
|
}
|
|
5128
6065
|
}
|
|
5129
6066
|
|
|
@@ -5136,7 +6073,7 @@ XXH3_hashLong_128b_withSeed(const void* input, size_t len,
|
|
|
5136
6073
|
{
|
|
5137
6074
|
(void)secret; (void)secretLen;
|
|
5138
6075
|
return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
|
|
5139
|
-
|
|
6076
|
+
XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
|
|
5140
6077
|
}
|
|
5141
6078
|
|
|
5142
6079
|
typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
|
|
@@ -5166,88 +6103,94 @@ XXH3_128bits_internal(const void* input, size_t len,
|
|
|
5166
6103
|
|
|
5167
6104
|
/* === Public XXH128 API === */
|
|
5168
6105
|
|
|
5169
|
-
/*! @ingroup
|
|
5170
|
-
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
|
|
6106
|
+
/*! @ingroup XXH3_family */
|
|
6107
|
+
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
|
|
5171
6108
|
{
|
|
5172
6109
|
return XXH3_128bits_internal(input, len, 0,
|
|
5173
6110
|
XXH3_kSecret, sizeof(XXH3_kSecret),
|
|
5174
6111
|
XXH3_hashLong_128b_default);
|
|
5175
6112
|
}
|
|
5176
6113
|
|
|
5177
|
-
/*! @ingroup
|
|
6114
|
+
/*! @ingroup XXH3_family */
|
|
5178
6115
|
XXH_PUBLIC_API XXH128_hash_t
|
|
5179
|
-
XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
|
|
6116
|
+
XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
|
|
5180
6117
|
{
|
|
5181
6118
|
return XXH3_128bits_internal(input, len, 0,
|
|
5182
6119
|
(const xxh_u8*)secret, secretSize,
|
|
5183
6120
|
XXH3_hashLong_128b_withSecret);
|
|
5184
6121
|
}
|
|
5185
6122
|
|
|
5186
|
-
/*! @ingroup
|
|
6123
|
+
/*! @ingroup XXH3_family */
|
|
5187
6124
|
XXH_PUBLIC_API XXH128_hash_t
|
|
5188
|
-
XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
|
|
6125
|
+
XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
|
|
5189
6126
|
{
|
|
5190
6127
|
return XXH3_128bits_internal(input, len, seed,
|
|
5191
6128
|
XXH3_kSecret, sizeof(XXH3_kSecret),
|
|
5192
6129
|
XXH3_hashLong_128b_withSeed);
|
|
5193
6130
|
}
|
|
5194
6131
|
|
|
5195
|
-
/*! @ingroup
|
|
6132
|
+
/*! @ingroup XXH3_family */
|
|
6133
|
+
XXH_PUBLIC_API XXH128_hash_t
|
|
6134
|
+
XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
|
|
6135
|
+
{
|
|
6136
|
+
if (len <= XXH3_MIDSIZE_MAX)
|
|
6137
|
+
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
|
|
6138
|
+
return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
|
|
6139
|
+
}
|
|
6140
|
+
|
|
6141
|
+
/*! @ingroup XXH3_family */
|
|
5196
6142
|
XXH_PUBLIC_API XXH128_hash_t
|
|
5197
|
-
XXH128(const void* input, size_t len, XXH64_hash_t seed)
|
|
6143
|
+
XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
|
|
5198
6144
|
{
|
|
5199
6145
|
return XXH3_128bits_withSeed(input, len, seed);
|
|
5200
6146
|
}
|
|
5201
6147
|
|
|
5202
6148
|
|
|
5203
6149
|
/* === XXH3 128-bit streaming === */
|
|
5204
|
-
|
|
6150
|
+
#ifndef XXH_NO_STREAM
|
|
5205
6151
|
/*
|
|
5206
|
-
* All
|
|
6152
|
+
* All initialization and update functions are identical to 64-bit streaming variant.
|
|
5207
6153
|
* The only difference is the finalization routine.
|
|
5208
6154
|
*/
|
|
5209
6155
|
|
|
5210
|
-
/*! @ingroup
|
|
6156
|
+
/*! @ingroup XXH3_family */
|
|
5211
6157
|
XXH_PUBLIC_API XXH_errorcode
|
|
5212
|
-
XXH3_128bits_reset(XXH3_state_t* statePtr)
|
|
6158
|
+
XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
|
|
5213
6159
|
{
|
|
5214
|
-
|
|
5215
|
-
XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
|
|
5216
|
-
return XXH_OK;
|
|
6160
|
+
return XXH3_64bits_reset(statePtr);
|
|
5217
6161
|
}
|
|
5218
6162
|
|
|
5219
|
-
/*! @ingroup
|
|
6163
|
+
/*! @ingroup XXH3_family */
|
|
5220
6164
|
XXH_PUBLIC_API XXH_errorcode
|
|
5221
|
-
XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
|
|
6165
|
+
XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
|
|
5222
6166
|
{
|
|
5223
|
-
|
|
5224
|
-
XXH3_reset_internal(statePtr, 0, secret, secretSize);
|
|
5225
|
-
if (secret == NULL) return XXH_ERROR;
|
|
5226
|
-
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
|
5227
|
-
return XXH_OK;
|
|
6167
|
+
return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
|
|
5228
6168
|
}
|
|
5229
6169
|
|
|
5230
|
-
/*! @ingroup
|
|
6170
|
+
/*! @ingroup XXH3_family */
|
|
5231
6171
|
XXH_PUBLIC_API XXH_errorcode
|
|
5232
|
-
XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
|
|
6172
|
+
XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
|
|
5233
6173
|
{
|
|
5234
|
-
|
|
5235
|
-
|
|
5236
|
-
|
|
5237
|
-
|
|
5238
|
-
|
|
6174
|
+
return XXH3_64bits_reset_withSeed(statePtr, seed);
|
|
6175
|
+
}
|
|
6176
|
+
|
|
6177
|
+
/*! @ingroup XXH3_family */
|
|
6178
|
+
XXH_PUBLIC_API XXH_errorcode
|
|
6179
|
+
XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
|
|
6180
|
+
{
|
|
6181
|
+
return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
|
|
5239
6182
|
}
|
|
5240
6183
|
|
|
5241
|
-
/*! @ingroup
|
|
6184
|
+
/*! @ingroup XXH3_family */
|
|
5242
6185
|
XXH_PUBLIC_API XXH_errorcode
|
|
5243
|
-
XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
|
|
6186
|
+
XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
|
|
5244
6187
|
{
|
|
5245
6188
|
return XXH3_update(state, (const xxh_u8*)input, len,
|
|
5246
|
-
|
|
6189
|
+
XXH3_accumulate, XXH3_scrambleAcc);
|
|
5247
6190
|
}
|
|
5248
6191
|
|
|
5249
|
-
/*! @ingroup
|
|
5250
|
-
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
|
|
6192
|
+
/*! @ingroup XXH3_family */
|
|
6193
|
+
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
|
|
5251
6194
|
{
|
|
5252
6195
|
const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
|
|
5253
6196
|
if (state->totalLen > XXH3_MIDSIZE_MAX) {
|
|
@@ -5271,13 +6214,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
|
|
|
5271
6214
|
return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
|
|
5272
6215
|
secret, state->secretLimit + XXH_STRIPE_LEN);
|
|
5273
6216
|
}
|
|
5274
|
-
|
|
6217
|
+
#endif /* !XXH_NO_STREAM */
|
|
5275
6218
|
/* 128-bit utility functions */
|
|
5276
6219
|
|
|
5277
6220
|
#include <string.h> /* memcmp, memcpy */
|
|
5278
6221
|
|
|
5279
6222
|
/* return : 1 is equal, 0 if different */
|
|
5280
|
-
/*! @ingroup
|
|
6223
|
+
/*! @ingroup XXH3_family */
|
|
5281
6224
|
XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
|
|
5282
6225
|
{
|
|
5283
6226
|
/* note : XXH128_hash_t is compact, it has no padding byte */
|
|
@@ -5285,11 +6228,11 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
|
|
|
5285
6228
|
}
|
|
5286
6229
|
|
|
5287
6230
|
/* This prototype is compatible with stdlib's qsort().
|
|
5288
|
-
* return : >0 if *h128_1 > *h128_2
|
|
5289
|
-
*
|
|
5290
|
-
*
|
|
5291
|
-
/*! @ingroup
|
|
5292
|
-
XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
|
|
6231
|
+
* @return : >0 if *h128_1 > *h128_2
|
|
6232
|
+
* <0 if *h128_1 < *h128_2
|
|
6233
|
+
* =0 if *h128_1 == *h128_2 */
|
|
6234
|
+
/*! @ingroup XXH3_family */
|
|
6235
|
+
XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
|
|
5293
6236
|
{
|
|
5294
6237
|
XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
|
|
5295
6238
|
XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
|
|
@@ -5301,22 +6244,22 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
|
|
|
5301
6244
|
|
|
5302
6245
|
|
|
5303
6246
|
/*====== Canonical representation ======*/
|
|
5304
|
-
/*! @ingroup
|
|
6247
|
+
/*! @ingroup XXH3_family */
|
|
5305
6248
|
XXH_PUBLIC_API void
|
|
5306
|
-
XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
|
|
6249
|
+
XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
|
|
5307
6250
|
{
|
|
5308
6251
|
XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
|
|
5309
6252
|
if (XXH_CPU_LITTLE_ENDIAN) {
|
|
5310
6253
|
hash.high64 = XXH_swap64(hash.high64);
|
|
5311
6254
|
hash.low64 = XXH_swap64(hash.low64);
|
|
5312
6255
|
}
|
|
5313
|
-
|
|
5314
|
-
|
|
6256
|
+
XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
|
|
6257
|
+
XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
|
|
5315
6258
|
}
|
|
5316
6259
|
|
|
5317
|
-
/*! @ingroup
|
|
6260
|
+
/*! @ingroup XXH3_family */
|
|
5318
6261
|
XXH_PUBLIC_API XXH128_hash_t
|
|
5319
|
-
XXH128_hashFromCanonical(const XXH128_canonical_t* src)
|
|
6262
|
+
XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
|
|
5320
6263
|
{
|
|
5321
6264
|
XXH128_hash_t h;
|
|
5322
6265
|
h.high64 = XXH_readBE64(src);
|
|
@@ -5324,10 +6267,81 @@ XXH128_hashFromCanonical(const XXH128_canonical_t* src)
|
|
|
5324
6267
|
return h;
|
|
5325
6268
|
}
|
|
5326
6269
|
|
|
6270
|
+
|
|
6271
|
+
|
|
6272
|
+
/* ==========================================
|
|
6273
|
+
* Secret generators
|
|
6274
|
+
* ==========================================
|
|
6275
|
+
*/
|
|
6276
|
+
#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
|
|
6277
|
+
|
|
6278
|
+
XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
|
|
6279
|
+
{
|
|
6280
|
+
XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
|
|
6281
|
+
XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
|
|
6282
|
+
}
|
|
6283
|
+
|
|
6284
|
+
/*! @ingroup XXH3_family */
|
|
6285
|
+
XXH_PUBLIC_API XXH_errorcode
|
|
6286
|
+
XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
|
|
6287
|
+
{
|
|
6288
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
|
6289
|
+
XXH_ASSERT(secretBuffer != NULL);
|
|
6290
|
+
XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
|
|
6291
|
+
#else
|
|
6292
|
+
/* production mode, assert() are disabled */
|
|
6293
|
+
if (secretBuffer == NULL) return XXH_ERROR;
|
|
6294
|
+
if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
|
|
6295
|
+
#endif
|
|
6296
|
+
|
|
6297
|
+
if (customSeedSize == 0) {
|
|
6298
|
+
customSeed = XXH3_kSecret;
|
|
6299
|
+
customSeedSize = XXH_SECRET_DEFAULT_SIZE;
|
|
6300
|
+
}
|
|
6301
|
+
#if (XXH_DEBUGLEVEL >= 1)
|
|
6302
|
+
XXH_ASSERT(customSeed != NULL);
|
|
6303
|
+
#else
|
|
6304
|
+
if (customSeed == NULL) return XXH_ERROR;
|
|
6305
|
+
#endif
|
|
6306
|
+
|
|
6307
|
+
/* Fill secretBuffer with a copy of customSeed - repeat as needed */
|
|
6308
|
+
{ size_t pos = 0;
|
|
6309
|
+
while (pos < secretSize) {
|
|
6310
|
+
size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
|
|
6311
|
+
memcpy((char*)secretBuffer + pos, customSeed, toCopy);
|
|
6312
|
+
pos += toCopy;
|
|
6313
|
+
} }
|
|
6314
|
+
|
|
6315
|
+
{ size_t const nbSeg16 = secretSize / 16;
|
|
6316
|
+
size_t n;
|
|
6317
|
+
XXH128_canonical_t scrambler;
|
|
6318
|
+
XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
|
|
6319
|
+
for (n=0; n<nbSeg16; n++) {
|
|
6320
|
+
XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
|
|
6321
|
+
XXH3_combine16((char*)secretBuffer + n*16, h128);
|
|
6322
|
+
}
|
|
6323
|
+
/* last segment */
|
|
6324
|
+
XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
|
|
6325
|
+
}
|
|
6326
|
+
return XXH_OK;
|
|
6327
|
+
}
|
|
6328
|
+
|
|
6329
|
+
/*! @ingroup XXH3_family */
|
|
6330
|
+
XXH_PUBLIC_API void
|
|
6331
|
+
XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
|
|
6332
|
+
{
|
|
6333
|
+
XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
|
|
6334
|
+
XXH3_initCustomSecret(secret, seed);
|
|
6335
|
+
XXH_ASSERT(secretBuffer != NULL);
|
|
6336
|
+
memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
|
|
6337
|
+
}
|
|
6338
|
+
|
|
6339
|
+
|
|
6340
|
+
|
|
5327
6341
|
/* Pop our optimization override from above */
|
|
5328
6342
|
#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
|
|
5329
6343
|
&& defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
|
|
5330
|
-
&& defined(__OPTIMIZE__) &&
|
|
6344
|
+
&& defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
|
|
5331
6345
|
# pragma GCC pop_options
|
|
5332
6346
|
#endif
|
|
5333
6347
|
|