rapidhash 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e8c9b8cb1fa7f46570ff6b03fe3a73a62206e3adfce2d94dcaa233b6743c3601
4
+ data.tar.gz: 8de28a980bbe539b0553e41f4adf91a7c77acba32521ce0c7a4530065037fb7f
5
+ SHA512:
6
+ metadata.gz: a7c108cd26cca846b9bfab8b9e860848732e26d67e1b524b484543a8c2e5438c8c906ecb679da31332d61fa8e0ee8219c3be6a447731864e929e86e195b5ad20
7
+ data.tar.gz: c6eb4964c4f67b8f6e5b65ed03143181beaf5fcc28216af3cc73d413c257a26c75b297a2151bca092781da356bd99f6985ce1809ff0e537ab8dad390a3dbd6fc
data/ext/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require 'rake/extensiontask'
2
+
3
+ spec = Gem::Specification.new do |s|
4
+ s.name = 'rapidhash'
5
+ s.platform = Gem::Platform::RUBY
6
+ s.extensions = FileList["ext/**/extconf.rb"]
7
+ end
8
+
9
+ Gem::PackageTask.new(spec) do |pkg|
10
+ end
11
+
12
+ Rake::ExtensionTask.new('rapidhash')
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ create_header
3
+ create_makefile('rapidhash')
@@ -0,0 +1,29 @@
1
+ #include "rapidhash.h"
2
+ #include "ruby.h"
3
+ #include "extconf.h"
4
+
5
+
6
+ #include <inttypes.h>
7
+
8
+ VALUE rapidhash_hash(VALUE self, VALUE data, VALUE seed)
9
+ {
10
+ // Unpack the data argument
11
+ Check_Type(data, T_STRING);
12
+ void* c_data = RSTRING_PTR(data);
13
+ size_t c_length = RSTRING_LEN(data);
14
+
15
+ // Unpack the seed argument
16
+ if (!FIXNUM_P(seed) && TYPE(seed) != T_BIGNUM) {
17
+ rb_raise(rb_eTypeError, "seed must be Numeric");
18
+ }
19
+ uint64_t c_seed = (uint64_t)NUM2ULL(seed);
20
+
21
+ return ULL2NUM(rapidhash_withSeed(c_data, c_length, c_seed));
22
+ }
23
+
24
+ void Init_rapidhash()
25
+ {
26
+ VALUE mod = rb_define_module("RapidHash");
27
+ rb_define_const(mod, "DEFAULT_SEED", ULL2NUM(RAPID_SEED));
28
+ rb_define_module_function(mod, "rapid_hash", &rapidhash_hash, 2);
29
+ }
@@ -0,0 +1,323 @@
1
+ /*
2
+ * rapidhash - Very fast, high quality, platform-independent hashing algorithm.
3
+ * Copyright (C) 2024 Nicolas De Carli
4
+ *
5
+ * Based on 'wyhash', by Wang Yi <godspeed_china@yeah.net>
6
+ *
7
+ * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
8
+ *
9
+ * Redistribution and use in source and binary forms, with or without
10
+ * modification, are permitted provided that the following conditions are
11
+ * met:
12
+ *
13
+ * * Redistributions of source code must retain the above copyright
14
+ * notice, this list of conditions and the following disclaimer.
15
+ * * Redistributions in binary form must reproduce the above
16
+ * copyright notice, this list of conditions and the following disclaimer
17
+ * in the documentation and/or other materials provided with the
18
+ * distribution.
19
+ *
20
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ *
32
+ * You can contact the author at:
33
+ * - rapidhash source repository: https://github.com/Nicoshev/rapidhash
34
+ */
35
+
36
+ /*
37
+ * Includes.
38
+ */
39
+ #include <stdint.h>
40
+ #include <string.h>
41
+ #if defined(_MSC_VER)
42
+ #include <intrin.h>
43
+ #if defined(_M_X64) && !defined(_M_ARM64EC)
44
+ #pragma intrinsic(_umul128)
45
+ #endif
46
+ #endif
47
+
48
+ /*
49
+ * C++ macros.
50
+ *
51
+ * RAPIDHASH_INLINE can be overridden to be stronger than a hint, i.e. by adding __attribute__((always_inline)).
52
+ */
53
+ #ifdef __cplusplus
54
+ #define RAPIDHASH_NOEXCEPT noexcept
55
+ #define RAPIDHASH_CONSTEXPR constexpr
56
+ #ifndef RAPIDHASH_INLINE
57
+ #define RAPIDHASH_INLINE inline
58
+ #endif
59
+ #else
60
+ #define RAPIDHASH_NOEXCEPT
61
+ #define RAPIDHASH_CONSTEXPR static const
62
+ #ifndef RAPIDHASH_INLINE
63
+ #define RAPIDHASH_INLINE static inline
64
+ #endif
65
+ #endif
66
+
67
+ /*
68
+ * Protection macro, alters behaviour of rapid_mum multiplication function.
69
+ *
70
+ * RAPIDHASH_FAST: Normal behavior, max speed.
71
+ * RAPIDHASH_PROTECTED: Extra protection against entropy loss.
72
+ */
73
+ #ifndef RAPIDHASH_PROTECTED
74
+ #define RAPIDHASH_FAST
75
+ #elif defined(RAPIDHASH_FAST)
76
+ #error "cannot define RAPIDHASH_PROTECTED and RAPIDHASH_FAST simultaneously."
77
+ #endif
78
+
79
+ /*
80
+ * Unrolling macros, changes code definition for main hash function.
81
+ *
82
+ * RAPIDHASH_COMPACT: Legacy variant, each loop process 48 bytes.
83
+ * RAPIDHASH_UNROLLED: Unrolled variant, each loop process 96 bytes.
84
+ *
85
+ * Most modern CPUs should benefit from having RAPIDHASH_UNROLLED.
86
+ *
87
+ * These macros do not alter the output hash.
88
+ */
89
+ #ifndef RAPIDHASH_COMPACT
90
+ #define RAPIDHASH_UNROLLED
91
+ #elif defined(RAPIDHASH_UNROLLED)
92
+ #error "cannot define RAPIDHASH_COMPACT and RAPIDHASH_UNROLLED simultaneously."
93
+ #endif
94
+
95
+ /*
96
+ * Likely and unlikely macros.
97
+ */
98
+ #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
99
+ #define _likely_(x) __builtin_expect(x,1)
100
+ #define _unlikely_(x) __builtin_expect(x,0)
101
+ #else
102
+ #define _likely_(x) (x)
103
+ #define _unlikely_(x) (x)
104
+ #endif
105
+
106
+ /*
107
+ * Endianness macros.
108
+ */
109
+ #ifndef RAPIDHASH_LITTLE_ENDIAN
110
+ #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
111
+ #define RAPIDHASH_LITTLE_ENDIAN
112
+ #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
113
+ #define RAPIDHASH_BIG_ENDIAN
114
+ #else
115
+ #warning "could not determine endianness! Falling back to little endian."
116
+ #define RAPIDHASH_LITTLE_ENDIAN
117
+ #endif
118
+ #endif
119
+
120
+ /*
121
+ * Default seed.
122
+ */
123
+ #define RAPID_SEED (0xbdd89aa982704029ull)
124
+
125
+ /*
126
+ * Default secret parameters.
127
+ */
128
+ RAPIDHASH_CONSTEXPR uint64_t rapid_secret[3] = {0x2d358dccaa6c78a5ull, 0x8bb84b93962eacc9ull, 0x4b33a62ed433d4a3ull};
129
+
130
+ /*
131
+ * 64*64 -> 128bit multiply function.
132
+ *
133
+ * @param A Address of 64-bit number.
134
+ * @param B Address of 64-bit number.
135
+ *
136
+ * Calculates 128-bit C = *A * *B.
137
+ *
138
+ * When RAPIDHASH_FAST is defined:
139
+ * Overwrites A contents with C's low 64 bits.
140
+ * Overwrites B contents with C's high 64 bits.
141
+ *
142
+ * When RAPIDHASH_PROTECTED is defined:
143
+ * Xors and overwrites A contents with C's low 64 bits.
144
+ * Xors and overwrites B contents with C's high 64 bits.
145
+ */
146
+ RAPIDHASH_INLINE void rapid_mum(uint64_t *A, uint64_t *B) RAPIDHASH_NOEXCEPT {
147
+ #if defined(__SIZEOF_INT128__)
148
+ __uint128_t r=*A; r*=*B;
149
+ #ifdef RAPIDHASH_PROTECTED
150
+ *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
151
+ #else
152
+ *A=(uint64_t)r; *B=(uint64_t)(r>>64);
153
+ #endif
154
+ #elif defined(_MSC_VER) && (defined(_WIN64) || defined(_M_HYBRID_CHPE_ARM64))
155
+ #if defined(_M_X64)
156
+ #ifdef RAPIDHASH_PROTECTED
157
+ uint64_t a, b;
158
+ a=_umul128(*A,*B,&b);
159
+ *A^=a; *B^=b;
160
+ #else
161
+ *A=_umul128(*A,*B,B);
162
+ #endif
163
+ #else
164
+ #ifdef RAPIDHASH_PROTECTED
165
+ uint64_t a, b;
166
+ b = __umulh(*A, *B);
167
+ a = *A * *B;
168
+ *A^=a; *B^=b;
169
+ #else
170
+ uint64_t c = __umulh(*A, *B);
171
+ *A = *A * *B;
172
+ *B = c;
173
+ #endif
174
+ #endif
175
+ #else
176
+ uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
177
+ uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t<rl;
178
+ lo=t+(rm1<<32); c+=lo<t; hi=rh+(rm0>>32)+(rm1>>32)+c;
179
+ #ifdef RAPIDHASH_PROTECTED
180
+ *A^=lo; *B^=hi;
181
+ #else
182
+ *A=lo; *B=hi;
183
+ #endif
184
+ #endif
185
+ }
186
+
187
+ /*
188
+ * Multiply and xor mix function.
189
+ *
190
+ * @param A 64-bit number.
191
+ * @param B 64-bit number.
192
+ *
193
+ * Calculates 128-bit C = A * B.
194
+ * Returns 64-bit xor between high and low 64 bits of C.
195
+ */
196
+ RAPIDHASH_INLINE uint64_t rapid_mix(uint64_t A, uint64_t B) RAPIDHASH_NOEXCEPT { rapid_mum(&A,&B); return A^B; }
197
+
198
+ /*
199
+ * Read functions.
200
+ */
201
+ #ifdef RAPIDHASH_LITTLE_ENDIAN
202
+ RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return v;}
203
+ RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return v;}
204
+ #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
205
+ RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return __builtin_bswap64(v);}
206
+ RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return __builtin_bswap32(v);}
207
+ #elif defined(_MSC_VER)
208
+ RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint64_t v; memcpy(&v, p, sizeof(uint64_t)); return _byteswap_uint64(v);}
209
+ RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT { uint32_t v; memcpy(&v, p, sizeof(uint32_t)); return _byteswap_ulong(v);}
210
+ #else
211
+ RAPIDHASH_INLINE uint64_t rapid_read64(const uint8_t *p) RAPIDHASH_NOEXCEPT {
212
+ uint64_t v; memcpy(&v, p, 8);
213
+ return (((v >> 56) & 0xff)| ((v >> 40) & 0xff00)| ((v >> 24) & 0xff0000)| ((v >> 8) & 0xff000000)| ((v << 8) & 0xff00000000)| ((v << 24) & 0xff0000000000)| ((v << 40) & 0xff000000000000)| ((v << 56) & 0xff00000000000000));
214
+ }
215
+ RAPIDHASH_INLINE uint64_t rapid_read32(const uint8_t *p) RAPIDHASH_NOEXCEPT {
216
+ uint32_t v; memcpy(&v, p, 4);
217
+ return (((v >> 24) & 0xff)| ((v >> 8) & 0xff00)| ((v << 8) & 0xff0000)| ((v << 24) & 0xff000000));
218
+ }
219
+ #endif
220
+
221
+ /*
222
+ * Reads and combines 3 bytes of input.
223
+ *
224
+ * @param p Buffer to read from.
225
+ * @param k Length of @p, in bytes.
226
+ *
227
+ * Always reads and combines 3 bytes from memory.
228
+ * Guarantees to read each buffer position at least once.
229
+ *
230
+ * Returns a 64-bit value containing all three bytes read.
231
+ */
232
+ RAPIDHASH_INLINE uint64_t rapid_readSmall(const uint8_t *p, size_t k) RAPIDHASH_NOEXCEPT { return (((uint64_t)p[0])<<56)|(((uint64_t)p[k>>1])<<32)|p[k-1];}
233
+
234
+ /*
235
+ * rapidhash main function.
236
+ *
237
+ * @param key Buffer to be hashed.
238
+ * @param len @key length, in bytes.
239
+ * @param seed 64-bit seed used to alter the hash result predictably.
240
+ * @param secret Triplet of 64-bit secrets used to alter hash result predictably.
241
+ *
242
+ * Returns a 64-bit hash.
243
+ */
244
+ RAPIDHASH_INLINE uint64_t rapidhash_internal(const void *key, size_t len, uint64_t seed, const uint64_t* secret) RAPIDHASH_NOEXCEPT {
245
+ const uint8_t *p=(const uint8_t *)key; seed^=rapid_mix(seed^secret[0],secret[1])^len; uint64_t a, b;
246
+ if(_likely_(len<=16)){
247
+ if(_likely_(len>=4)){
248
+ const uint8_t * plast = p + len - 4;
249
+ a = (rapid_read32(p) << 32) | rapid_read32(plast);
250
+ const uint64_t delta = ((len&24)>>(len>>3));
251
+ b = ((rapid_read32(p + delta) << 32) | rapid_read32(plast - delta)); }
252
+ else if(_likely_(len>0)){ a=rapid_readSmall(p,len); b=0;}
253
+ else a=b=0;
254
+ }
255
+ else{
256
+ size_t i=len;
257
+ if(_unlikely_(i>48)){
258
+ uint64_t see1=seed, see2=seed;
259
+ #ifdef RAPIDHASH_UNROLLED
260
+ while(_likely_(i>=96)){
261
+ seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
262
+ see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
263
+ see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
264
+ seed=rapid_mix(rapid_read64(p+48)^secret[0],rapid_read64(p+56)^seed);
265
+ see1=rapid_mix(rapid_read64(p+64)^secret[1],rapid_read64(p+72)^see1);
266
+ see2=rapid_mix(rapid_read64(p+80)^secret[2],rapid_read64(p+88)^see2);
267
+ p+=96; i-=96;
268
+ }
269
+ if(_unlikely_(i>=48)){
270
+ seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
271
+ see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
272
+ see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
273
+ p+=48; i-=48;
274
+ }
275
+ #else
276
+ do {
277
+ seed=rapid_mix(rapid_read64(p)^secret[0],rapid_read64(p+8)^seed);
278
+ see1=rapid_mix(rapid_read64(p+16)^secret[1],rapid_read64(p+24)^see1);
279
+ see2=rapid_mix(rapid_read64(p+32)^secret[2],rapid_read64(p+40)^see2);
280
+ p+=48; i-=48;
281
+ } while (_likely_(i>=48));
282
+ #endif
283
+ seed^=see1^see2;
284
+ }
285
+ if(i>16){
286
+ seed=rapid_mix(rapid_read64(p)^secret[2],rapid_read64(p+8)^seed^secret[1]);
287
+ if(i>32)
288
+ seed=rapid_mix(rapid_read64(p+16)^secret[2],rapid_read64(p+24)^seed);
289
+ }
290
+ a=rapid_read64(p+i-16); b=rapid_read64(p+i-8);
291
+ }
292
+ a^=secret[1]; b^=seed; rapid_mum(&a,&b);
293
+ return rapid_mix(a^secret[0]^len,b^secret[1]);
294
+ }
295
+
296
+ /*
297
+ * rapidhash default seeded hash function.
298
+ *
299
+ * @param key Buffer to be hashed.
300
+ * @param len @key length, in bytes.
301
+ * @param seed 64-bit seed used to alter the hash result predictably.
302
+ *
303
+ * Calls rapidhash_internal using provided parameters and default secrets.
304
+ *
305
+ * Returns a 64-bit hash.
306
+ */
307
+ RAPIDHASH_INLINE uint64_t rapidhash_withSeed(const void *key, size_t len, uint64_t seed) RAPIDHASH_NOEXCEPT {
308
+ return rapidhash_internal(key, len, seed, rapid_secret);
309
+ }
310
+
311
+ /*
312
+ * rapidhash default hash function.
313
+ *
314
+ * @param key Buffer to be hashed.
315
+ * @param len @key length, in bytes.
316
+ *
317
+ * Calls rapidhash_withSeed using provided parameters and the default seed.
318
+ *
319
+ * Returns a 64-bit hash.
320
+ */
321
+ RAPIDHASH_INLINE uint64_t rapidhash(const void *key, size_t len) RAPIDHASH_NOEXCEPT {
322
+ return rapidhash_withSeed(key, len, RAPID_SEED);
323
+ }
data/lib/rapidhash.so ADDED
Binary file
data/rapidhash.gemspec ADDED
@@ -0,0 +1,16 @@
1
+ require 'fileutils'
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'rapidhash'
5
+ s.version = '0.0.1'
6
+ s.licenses = ['MIT']
7
+ s.summary = "FFI wrapper for the rapidhash hash function"
8
+ s.description = "This gem wraps rapidhash (https://github.com/Nicoshev/rapidhash/tree/master), the successor to wyhash and the fastest hash function passing all tests in SMHasher3."
9
+ s.authors = ['Praneeth Sadda']
10
+ s.email = 'psadda@gmail.com'
11
+ s.files = %w(rapidhash.gemspec) + Dir.glob("{lib,spec,ext}/**/*")
12
+ s.extensions = ['ext/rapidhash/extconf.rb']
13
+ s.required_ruby_version = '>= 3.0.0'
14
+ s.add_development_dependency 'rake'
15
+ s.metadata = { 'source_code_uri' => 'https://github.com/psadda/rapidhash-ruby' }
16
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rapidhash
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Praneeth Sadda
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-12-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ description: This gem wraps rapidhash (https://github.com/Nicoshev/rapidhash/tree/master),
28
+ the successor to wyhash and the fastest hash function passing all tests in SMHasher3.
29
+ email: psadda@gmail.com
30
+ executables: []
31
+ extensions:
32
+ - ext/rapidhash/extconf.rb
33
+ extra_rdoc_files: []
34
+ files:
35
+ - ext/Rakefile
36
+ - ext/rapidhash/extconf.rb
37
+ - ext/rapidhash/rapidhash.c
38
+ - ext/rapidhash/rapidhash.h
39
+ - lib/rapidhash.so
40
+ - rapidhash.gemspec
41
+ homepage:
42
+ licenses:
43
+ - MIT
44
+ metadata:
45
+ source_code_uri: https://github.com/psadda/rapidhash-ruby
46
+ post_install_message:
47
+ rdoc_options: []
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 3.0.0
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubygems_version: 3.5.23
62
+ signing_key:
63
+ specification_version: 4
64
+ summary: FFI wrapper for the rapidhash hash function
65
+ test_files: []