murmurhash3 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/murmurhash3/extconf.rb +8 -0
- data/ext/murmurhash3/murmur3.c +441 -0
- data/lib/murmurhash3.rb +16 -0
- data/lib/murmurhash3/pure_ruby.rb +155 -0
- data/lib/murmurhash3/version.rb +3 -0
- data/test/test_murmur.rb +76 -0
- metadata +54 -0
@@ -0,0 +1,441 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
/*-----------------------------------------------------------------------------
|
3
|
+
* MurmurHash3 was written by Austin Appleby, and is placed in the public
|
4
|
+
* domain. The author hereby disclaims copyright to this source code.
|
5
|
+
|
6
|
+
* Note - The x86 and x64 versions do _not_ produce the same results, as the
|
7
|
+
* algorithms are optimized for their respective platforms. You can still
|
8
|
+
* compile and run any of them on any platform, but your performance with the
|
9
|
+
* non-native version will be less than optimal.
|
10
|
+
*/
|
11
|
+
|
12
|
+
typedef unsigned char uint8_t;
|
13
|
+
typedef unsigned int uint32_t;
|
14
|
+
#if SIZEOF_LONG == 8
|
15
|
+
typedef unsigned long uint64_t;
|
16
|
+
#else
|
17
|
+
typedef unsigned long long uint64_t;
|
18
|
+
#endif
|
19
|
+
|
20
|
+
/*-----------------------------------------------------------------------------
|
21
|
+
* Platform-specific functions and macros
|
22
|
+
*/
|
23
|
+
|
24
|
+
#ifdef __GNUC__
|
25
|
+
#define FORCE_INLINE __attribute__((always_inline))
|
26
|
+
#elif defined(_MSC_VER)
|
27
|
+
#define FORCE_INLINE __forceinline
|
28
|
+
#else
|
29
|
+
#define FORCE_INLINE
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#if defined(_MSC_VER)
|
33
|
+
|
34
|
+
#define ROTL32(x,y) _rotl(x,y)
|
35
|
+
#define ROTL64(x,y) _rotl64(x,y)
|
36
|
+
|
37
|
+
#define BIG_CONSTANT(x) (x)
|
38
|
+
|
39
|
+
#else
|
40
|
+
|
41
|
+
static inline FORCE_INLINE uint32_t
|
42
|
+
rotl32 ( uint32_t x, int8_t r )
|
43
|
+
{
|
44
|
+
return (x << r) | (x >> (32 - r));
|
45
|
+
}
|
46
|
+
|
47
|
+
static inline FORCE_INLINE uint64_t
|
48
|
+
rotl64 ( uint64_t x, int8_t r )
|
49
|
+
{
|
50
|
+
return (x << r) | (x >> (64 - r));
|
51
|
+
}
|
52
|
+
|
53
|
+
#define ROTL32(x,y) rotl32(x,y)
|
54
|
+
#define ROTL64(x,y) rotl64(x,y)
|
55
|
+
|
56
|
+
#define BIG_CONSTANT(x) (x##LLU)
|
57
|
+
#endif
|
58
|
+
|
59
|
+
/* end platform specific */
|
60
|
+
|
61
|
+
/* Block read - if your platform needs to do endian-swapping or can only
|
62
|
+
* handle aligned reads, do the conversion here */
|
63
|
+
#ifdef WORDS_BIGENDIAN
|
64
|
+
#if GCC_VERSION_SINCE(4,3,0)
|
65
|
+
# define swap32(x) __builtin_bswap32(x)
|
66
|
+
# define swap64(x) __builtin_bswap64(x)
|
67
|
+
#endif
|
68
|
+
|
69
|
+
#ifndef swap32
|
70
|
+
# define swap32(x) ((((x)&0xFF)<<24) \
|
71
|
+
|(((x)>>24)&0xFF) \
|
72
|
+
|(((x)&0x0000FF00)<<8) \
|
73
|
+
|(((x)&0x00FF0000)>>8) )
|
74
|
+
#endif
|
75
|
+
|
76
|
+
#ifndef swap64
|
77
|
+
# ifdef HAVE_INT64_T
|
78
|
+
static inline FORCE_INLINE uint64_t
|
79
|
+
swap64(uint64_t x) {
|
80
|
+
x = (x>>32) | (x << 32);
|
81
|
+
x = ((x & BIG_CONSTANT(0xFFFF0000FFFF0000)) >> 16) |
|
82
|
+
((x & BIG_CONSTANT(0x0000FFFF0000FFFF)) << 16);
|
83
|
+
return ((x & BIG_CONSTANT(0xFF00FF00FF00FF00)) >> 8) |
|
84
|
+
((x & BIG_CONSTANT(0x00FF00FF00FF00FF)) << 8);
|
85
|
+
}
|
86
|
+
# endif
|
87
|
+
|
88
|
+
#endif
|
89
|
+
static inline FORCE_INLINE uint32_t
|
90
|
+
getblock32(const uint32_t * p, int i)
|
91
|
+
{
|
92
|
+
return swap32(p[i]);
|
93
|
+
}
|
94
|
+
|
95
|
+
static inline FORCE_INLINE uint64_t
|
96
|
+
getblock64(const uint64_t * p, int i)
|
97
|
+
{
|
98
|
+
return swap64(p[i]);
|
99
|
+
}
|
100
|
+
#else
|
101
|
+
#define getblock32(p, i) (p[i])
|
102
|
+
#define getblock64(p, i) (p[i])
|
103
|
+
#endif
|
104
|
+
|
105
|
+
/* Finalization mix - force all bits of a hash block to avalanche */
|
106
|
+
|
107
|
+
static inline FORCE_INLINE uint32_t
|
108
|
+
fmix32 ( uint32_t h )
|
109
|
+
{
|
110
|
+
h ^= h >> 16;
|
111
|
+
h *= 0x85ebca6b;
|
112
|
+
h ^= h >> 13;
|
113
|
+
h *= 0xc2b2ae35;
|
114
|
+
h ^= h >> 16;
|
115
|
+
|
116
|
+
return h;
|
117
|
+
}
|
118
|
+
|
119
|
+
static inline FORCE_INLINE uint64_t
|
120
|
+
fmix64 ( uint64_t k )
|
121
|
+
{
|
122
|
+
k ^= k >> 33;
|
123
|
+
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
124
|
+
k ^= k >> 33;
|
125
|
+
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
126
|
+
k ^= k >> 33;
|
127
|
+
|
128
|
+
return k;
|
129
|
+
}
|
130
|
+
|
131
|
+
static inline FORCE_INLINE uint32_t
|
132
|
+
mmix32(uint32_t k1)
|
133
|
+
{
|
134
|
+
k1 *= 0xcc9e2d51;
|
135
|
+
k1 = ROTL32(k1, 15);
|
136
|
+
return k1 * 0x1b873593;
|
137
|
+
}
|
138
|
+
|
139
|
+
static uint32_t
|
140
|
+
MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed)
|
141
|
+
{
|
142
|
+
const uint8_t * data = (const uint8_t*)key;
|
143
|
+
const int nblocks = len / 4;
|
144
|
+
int i;
|
145
|
+
|
146
|
+
uint32_t h1 = seed;
|
147
|
+
uint32_t k1 = 0;
|
148
|
+
|
149
|
+
|
150
|
+
/* body */
|
151
|
+
|
152
|
+
const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
|
153
|
+
|
154
|
+
for(i = -nblocks; i; i++)
|
155
|
+
{
|
156
|
+
h1 ^= mmix32(getblock32(blocks, i));
|
157
|
+
h1 = ROTL32(h1,13);
|
158
|
+
h1 = h1*5+0xe6546b64;
|
159
|
+
}
|
160
|
+
|
161
|
+
/* tail */
|
162
|
+
|
163
|
+
data += nblocks*4;
|
164
|
+
|
165
|
+
switch(len & 3)
|
166
|
+
{
|
167
|
+
case 3: k1 ^= data[2] << 16;
|
168
|
+
case 2: k1 ^= data[1] << 8;
|
169
|
+
case 1: k1 ^= data[0];
|
170
|
+
h1 ^= mmix32(k1);
|
171
|
+
};
|
172
|
+
|
173
|
+
/* finalization */
|
174
|
+
|
175
|
+
h1 ^= len;
|
176
|
+
|
177
|
+
h1 = fmix32(h1);
|
178
|
+
|
179
|
+
return h1;
|
180
|
+
}
|
181
|
+
|
182
|
+
#define C1_128 BIG_CONSTANT(0x87c37b91114253d5)
|
183
|
+
#define C2_128 BIG_CONSTANT(0x4cf5ad432745937f)
|
184
|
+
|
185
|
+
static inline FORCE_INLINE uint64_t
|
186
|
+
mmix128_1(uint64_t k1)
|
187
|
+
{
|
188
|
+
k1 *= C1_128;
|
189
|
+
k1 = ROTL64(k1, 31);
|
190
|
+
return k1 * C2_128;
|
191
|
+
}
|
192
|
+
|
193
|
+
static inline FORCE_INLINE uint64_t
|
194
|
+
mmix128_2(uint64_t k2)
|
195
|
+
{
|
196
|
+
k2 *= C2_128;
|
197
|
+
k2 = ROTL64(k2, 33);
|
198
|
+
return k2 * C1_128;
|
199
|
+
}
|
200
|
+
|
201
|
+
static void MurmurHash3_x64_128 ( const void * key, const int len,
|
202
|
+
const uint32_t seed, void * out )
|
203
|
+
{
|
204
|
+
const uint8_t * data = (const uint8_t*)key;
|
205
|
+
const int nblocks = len / 16;
|
206
|
+
int i;
|
207
|
+
|
208
|
+
uint64_t h1 = seed;
|
209
|
+
uint64_t h2 = seed;
|
210
|
+
uint64_t k1 = 0, k2 = 0;
|
211
|
+
|
212
|
+
/* body */
|
213
|
+
|
214
|
+
const uint64_t * blocks = (const uint64_t *)(data);
|
215
|
+
|
216
|
+
for(i = 0; i < nblocks; i++)
|
217
|
+
{
|
218
|
+
k1 = getblock64(blocks, i*2+0);
|
219
|
+
k2 = getblock64(blocks, i*2+1);
|
220
|
+
|
221
|
+
h1 ^= mmix128_1(k1);
|
222
|
+
h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
223
|
+
|
224
|
+
h2 ^= mmix128_2(k2);
|
225
|
+
h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
226
|
+
}
|
227
|
+
|
228
|
+
/* tail */
|
229
|
+
|
230
|
+
data += nblocks*16;
|
231
|
+
k1 = k2 = 0;
|
232
|
+
|
233
|
+
switch(len & 15)
|
234
|
+
{
|
235
|
+
case 15: k2 ^= (uint64_t)(data[14]) << 48;
|
236
|
+
case 14: k2 ^= (uint64_t)(data[13]) << 40;
|
237
|
+
case 13: k2 ^= (uint64_t)(data[12]) << 32;
|
238
|
+
case 12: k2 ^= (uint64_t)(data[11]) << 24;
|
239
|
+
case 11: k2 ^= (uint64_t)(data[10]) << 16;
|
240
|
+
case 10: k2 ^= (uint64_t)(data[ 9]) << 8;
|
241
|
+
case 9: k2 ^= (uint64_t)(data[ 8]) << 0;
|
242
|
+
h2 ^= mmix128_2(k2);
|
243
|
+
|
244
|
+
case 8: k1 ^= (uint64_t)(data[ 7]) << 56;
|
245
|
+
case 7: k1 ^= (uint64_t)(data[ 6]) << 48;
|
246
|
+
case 6: k1 ^= (uint64_t)(data[ 5]) << 40;
|
247
|
+
case 5: k1 ^= (uint64_t)(data[ 4]) << 32;
|
248
|
+
case 4: k1 ^= (uint64_t)(data[ 3]) << 24;
|
249
|
+
case 3: k1 ^= (uint64_t)(data[ 2]) << 16;
|
250
|
+
case 2: k1 ^= (uint64_t)(data[ 1]) << 8;
|
251
|
+
case 1: k1 ^= (uint64_t)(data[ 0]) << 0;
|
252
|
+
h1 ^= mmix128_1(k1);
|
253
|
+
};
|
254
|
+
|
255
|
+
/* finalization */
|
256
|
+
|
257
|
+
h1 ^= len; h2 ^= len;
|
258
|
+
|
259
|
+
h1 += h2;
|
260
|
+
h2 += h1;
|
261
|
+
|
262
|
+
h1 = fmix64(h1);
|
263
|
+
h2 = fmix64(h2);
|
264
|
+
|
265
|
+
h1 += h2;
|
266
|
+
h2 += h1;
|
267
|
+
|
268
|
+
((uint64_t*)out)[0] = h1;
|
269
|
+
((uint64_t*)out)[1] = h2;
|
270
|
+
}
|
271
|
+
|
272
|
+
/* end of MurmurHash3 algorithm */
|
273
|
+
|
274
|
+
static VALUE
|
275
|
+
rb_fmix32(VALUE self, VALUE integer)
|
276
|
+
{
|
277
|
+
uint32_t _int = NUM2UINT(integer);
|
278
|
+
return UINT2NUM(fmix32(_int));
|
279
|
+
}
|
280
|
+
|
281
|
+
static VALUE
|
282
|
+
rb_fmix64(VALUE self, VALUE integer)
|
283
|
+
{
|
284
|
+
#if SIZEOF_LONG == 8
|
285
|
+
uint64_t _int = NUM2ULONG(integer);
|
286
|
+
return ULONG2NUM(fmix64(_int));
|
287
|
+
#else
|
288
|
+
uint64_t _int = NUM2ULL(integer);
|
289
|
+
return ULL2NUM(fmix64(_int));
|
290
|
+
#endif
|
291
|
+
}
|
292
|
+
|
293
|
+
static VALUE
|
294
|
+
rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
|
295
|
+
{
|
296
|
+
VALUE rstr, rseed;
|
297
|
+
uint32_t result;
|
298
|
+
|
299
|
+
rb_scan_args(argc, argv, "11", &rstr, &rseed);
|
300
|
+
|
301
|
+
result = MurmurHash3_x86_32(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(rseed));
|
302
|
+
|
303
|
+
return UINT2NUM(result);
|
304
|
+
}
|
305
|
+
|
306
|
+
static VALUE
|
307
|
+
rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
|
308
|
+
{
|
309
|
+
VALUE rint, rseed;
|
310
|
+
uint32_t _int;
|
311
|
+
uint32_t result;
|
312
|
+
|
313
|
+
rb_scan_args(argc, argv, "11", &rint, &rseed);
|
314
|
+
_int = NUM2UINT(rint);
|
315
|
+
|
316
|
+
result = MurmurHash3_x86_32(&_int, 4, argc == 1 ? 0 : NUM2UINT(rseed));
|
317
|
+
|
318
|
+
return UINT2NUM(result);
|
319
|
+
}
|
320
|
+
|
321
|
+
static VALUE
|
322
|
+
rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
323
|
+
{
|
324
|
+
VALUE rint, rseed;
|
325
|
+
uint64_t _int;
|
326
|
+
uint32_t result;
|
327
|
+
|
328
|
+
rb_scan_args(argc, argv, "11", &rint, &rseed);
|
329
|
+
#if SIZEOF_LONG == 8
|
330
|
+
_int = NUM2ULONG(rint);
|
331
|
+
#else
|
332
|
+
_int = NUM2ULL(rint);
|
333
|
+
#endif
|
334
|
+
|
335
|
+
result = MurmurHash3_x86_32(&_int, 8, argc == 1 ? 0 : NUM2UINT(rseed));
|
336
|
+
|
337
|
+
return UINT2NUM(result);
|
338
|
+
}
|
339
|
+
|
340
|
+
#define PREPARE_128_BIT() \
|
341
|
+
VALUE rstr, rseed, ar_result; \
|
342
|
+
uint32_t result[4]; \
|
343
|
+
rb_scan_args(argc, argv, "11", &rstr, &rseed)
|
344
|
+
|
345
|
+
#define SWAP_128_BIT() do { \
|
346
|
+
uint32_t tmp; \
|
347
|
+
tmp = result[0]; \
|
348
|
+
result[0] = result[1]; \
|
349
|
+
result[1] = tmp; \
|
350
|
+
tmp = result[2]; \
|
351
|
+
result[2] = result[3]; \
|
352
|
+
result[3] = tmp; \
|
353
|
+
} while (0)
|
354
|
+
|
355
|
+
#define RETURN_128_BIT() \
|
356
|
+
ar_result = rb_ary_new2(4); \
|
357
|
+
rb_ary_push(ar_result, UINT2NUM(result[0])); \
|
358
|
+
rb_ary_push(ar_result, UINT2NUM(result[1])); \
|
359
|
+
rb_ary_push(ar_result, UINT2NUM(result[2])); \
|
360
|
+
rb_ary_push(ar_result, UINT2NUM(result[3])); \
|
361
|
+
return ar_result
|
362
|
+
|
363
|
+
static VALUE
|
364
|
+
rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
|
365
|
+
{
|
366
|
+
PREPARE_128_BIT();
|
367
|
+
|
368
|
+
MurmurHash3_x64_128(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(rseed), result);
|
369
|
+
#if WORDS_BIGENDIAN
|
370
|
+
SWAP_128_BIT();
|
371
|
+
#endif
|
372
|
+
RETURN_128_BIT();
|
373
|
+
}
|
374
|
+
|
375
|
+
static VALUE
|
376
|
+
rb_murmur3_128_int32_hash(int argc, VALUE* argv, VALUE self)
|
377
|
+
{
|
378
|
+
PREPARE_128_BIT();
|
379
|
+
|
380
|
+
{
|
381
|
+
uint32_t _int = NUM2UINT(rstr);
|
382
|
+
MurmurHash3_x64_128(&_int, 4, argc == 1 ? 0 : NUM2UINT(rseed), result);
|
383
|
+
}
|
384
|
+
#if WORDS_BIGENDIAN
|
385
|
+
SWAP_128_BIT();
|
386
|
+
#endif
|
387
|
+
RETURN_128_BIT();
|
388
|
+
}
|
389
|
+
|
390
|
+
static VALUE
|
391
|
+
rb_murmur3_128_int64_hash(int argc, VALUE* argv, VALUE self)
|
392
|
+
{
|
393
|
+
PREPARE_128_BIT();
|
394
|
+
|
395
|
+
{
|
396
|
+
#if SIZEOF_LONG == 8
|
397
|
+
uint64_t _int = NUM2ULONG(rstr);
|
398
|
+
#else
|
399
|
+
uint64_t _int = NUM2ULL(rstr);
|
400
|
+
#endif
|
401
|
+
MurmurHash3_x64_128(&_int, 8, argc == 1 ? 0 : NUM2UINT(rseed), result);
|
402
|
+
}
|
403
|
+
#if WORDS_BIGENDIAN
|
404
|
+
SWAP_128_BIT();
|
405
|
+
#endif
|
406
|
+
RETURN_128_BIT();
|
407
|
+
}
|
408
|
+
|
409
|
+
void
|
410
|
+
Init_native_murmur() {
|
411
|
+
VALUE singleton;
|
412
|
+
VALUE mod_murmur = rb_define_module("MurmurHash3");
|
413
|
+
VALUE mod_murmur32 = rb_define_module_under(mod_murmur, "Native32");
|
414
|
+
VALUE mod_murmur128 = rb_define_module_under(mod_murmur, "Native128");
|
415
|
+
|
416
|
+
rb_define_method(mod_murmur32, "murmur3_32_fmix", rb_fmix32, 1);
|
417
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_hash", rb_murmur3_32_str_hash, -1);
|
418
|
+
rb_define_method(mod_murmur32, "murmur3_32_int32_hash", rb_murmur3_32_int32_hash, -1);
|
419
|
+
rb_define_method(mod_murmur32, "murmur3_32_int64_hash", rb_murmur3_32_int64_hash, -1);
|
420
|
+
|
421
|
+
rb_extend_object(mod_murmur32, mod_murmur32);
|
422
|
+
singleton = rb_singleton_class(mod_murmur32);
|
423
|
+
rb_define_alias(singleton, "fmix", "murmur3_32_fmix");
|
424
|
+
rb_define_alias(singleton, "str_hash", "murmur3_32_str_hash");
|
425
|
+
rb_define_alias(singleton, "int32_hash", "murmur3_32_int32_hash");
|
426
|
+
rb_define_alias(singleton, "int64_hash", "murmur3_32_int64_hash");
|
427
|
+
|
428
|
+
|
429
|
+
rb_define_method(mod_murmur128, "murmur3_128_fmix", rb_fmix64, 1);
|
430
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_hash", rb_murmur3_128_str_hash, -1);
|
431
|
+
rb_define_method(mod_murmur128, "murmur3_128_int32_hash", rb_murmur3_128_int32_hash, -1);
|
432
|
+
rb_define_method(mod_murmur128, "murmur3_128_int64_hash", rb_murmur3_128_int64_hash, -1);
|
433
|
+
|
434
|
+
rb_extend_object(mod_murmur128, mod_murmur128);
|
435
|
+
singleton = rb_singleton_class(mod_murmur128);
|
436
|
+
rb_define_alias(singleton, "fmix", "murmur3_128_fmix");
|
437
|
+
rb_define_alias(singleton, "str_hash", "murmur3_128_str_hash");
|
438
|
+
rb_define_alias(singleton, "int32_hash", "murmur3_128_int32_hash");
|
439
|
+
rb_define_alias(singleton, "int64_hash", "murmur3_128_int64_hash");
|
440
|
+
|
441
|
+
}
|
data/lib/murmurhash3.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "murmurhash3/version"
|
2
|
+
|
3
|
+
module MurmurHash3
|
4
|
+
begin
|
5
|
+
require 'murmurhash3/native_murmur'
|
6
|
+
V32 = Native32
|
7
|
+
V128 = Native128
|
8
|
+
rescue LoadError
|
9
|
+
require 'murmurhash3/pure_ruby'
|
10
|
+
if RUBY_ENGINE == 'ruby'
|
11
|
+
$stderr.puts "Attention: used pure ruby version of MurmurHash3"
|
12
|
+
end
|
13
|
+
V32 = PureRuby32
|
14
|
+
V128 = PureRuby128
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
require 'digest'
|
2
|
+
module MurmurHash3
|
3
|
+
module PureRuby32
|
4
|
+
MASK32 = 0xffffffff
|
5
|
+
def murmur3_32_rotl(x, r)
|
6
|
+
((x << r) | (x >> (32 - r))) & MASK32
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def murmur3_32_fmix(h)
|
11
|
+
h &= MASK32
|
12
|
+
h ^= h >> 16
|
13
|
+
h = (h * 0x85ebca6b) & MASK32
|
14
|
+
h ^= h >> 13
|
15
|
+
h = (h * 0xc2b2ae35) & MASK32
|
16
|
+
h ^ (h >> 16)
|
17
|
+
end
|
18
|
+
|
19
|
+
def murmur3_32__mmix(k1)
|
20
|
+
k1 = (k1 * 0xcc9e2d51) & MASK32
|
21
|
+
k1 = murmur3_32_rotl(k1, 15)
|
22
|
+
(k1 * 0x1b873593) & MASK32
|
23
|
+
end
|
24
|
+
|
25
|
+
def murmur3_32_str_hash(str, seed=0)
|
26
|
+
h1 = seed
|
27
|
+
numbers = str.unpack('V*C*')
|
28
|
+
tailn = str.bytesize % 4
|
29
|
+
tail = numbers.slice!(numbers.size - tailn, tailn)
|
30
|
+
for k1 in numbers
|
31
|
+
h1 ^= murmur3_32__mmix(k1)
|
32
|
+
h1 = murmur3_32_rotl(h1, 13)
|
33
|
+
h1 = (h1*5 + 0xe6546b64) & MASK32
|
34
|
+
end
|
35
|
+
|
36
|
+
unless tail.empty?
|
37
|
+
k1 = 0
|
38
|
+
tail.reverse_each do |c1|
|
39
|
+
k1 = (k1 << 8) | c1
|
40
|
+
end
|
41
|
+
h1 ^= murmur3_32__mmix(k1)
|
42
|
+
end
|
43
|
+
|
44
|
+
h1 ^= str.bytesize
|
45
|
+
murmur3_32_fmix(h1)
|
46
|
+
end
|
47
|
+
|
48
|
+
def murmur3_32_int32_hash(i, seed=0)
|
49
|
+
str_hash([i].pack("V"), seed)
|
50
|
+
end
|
51
|
+
|
52
|
+
def murmur3_32_int64_hash(i, seed=0)
|
53
|
+
str_hash([i].pack("Q<"), seed)
|
54
|
+
end
|
55
|
+
|
56
|
+
class << self
|
57
|
+
include MurmurHash3::PureRuby32
|
58
|
+
alias fmix murmur3_32_fmix
|
59
|
+
alias str_hash murmur3_32_str_hash
|
60
|
+
alias int32_hash murmur3_32_int32_hash
|
61
|
+
alias int64_hash murmur3_32_int64_hash
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
module PureRuby128
|
66
|
+
MASK64 = 0xffff_ffff_ffff_ffff
|
67
|
+
|
68
|
+
def murmur3_128_rotl(x, r)
|
69
|
+
((x << r) | (x >> (64 - r))) & MASK64
|
70
|
+
end
|
71
|
+
|
72
|
+
def murmur3_128_fmix(h)
|
73
|
+
h &= MASK64
|
74
|
+
h ^= h >> 33
|
75
|
+
h = (h * 0xff51afd7_ed558ccd) & MASK64
|
76
|
+
h ^= h >> 33
|
77
|
+
h = (h * 0xc4ceb9fe_1a85ec53) & MASK64
|
78
|
+
h ^ (h >> 33)
|
79
|
+
end
|
80
|
+
|
81
|
+
C1_128 = 0x87c37b91_114253d5
|
82
|
+
C2_128 = 0x4cf5ad43_2745937f
|
83
|
+
def murmur3_128__mmix1(k1)
|
84
|
+
k1 = (k1 * C1_128) & MASK64
|
85
|
+
k1 = murmur3_128_rotl(k1, 31)
|
86
|
+
(k1 * C2_128) & MASK64
|
87
|
+
end
|
88
|
+
|
89
|
+
def murmur3_128__mmix2(k2)
|
90
|
+
k2 = (k2 * C2_128) & MASK64
|
91
|
+
k2 = murmur3_128_rotl(k2, 33)
|
92
|
+
(k2 * C1_128) & MASK64
|
93
|
+
end
|
94
|
+
|
95
|
+
def murmur3_128_str_hash(str, seed=0)
|
96
|
+
h1 = h2 = seed
|
97
|
+
fast_part = ((str.bytesize / 16) * 16)
|
98
|
+
numbers = str.byteslice(0, fast_part).unpack('Q<*')
|
99
|
+
tail = str.byteslice(fast_part, str.bytesize - fast_part).unpack('C*')
|
100
|
+
|
101
|
+
numbers.each_slice(2) do |k1, k2|
|
102
|
+
h1 ^= murmur3_128__mmix1(k1)
|
103
|
+
h1 = murmur3_128_rotl(h1, 27)
|
104
|
+
h1 = (h1 + h2) & MASK64
|
105
|
+
h1 = (h1*5 + 0x52dce729) & MASK64
|
106
|
+
h2 ^= murmur3_128__mmix2(k2)
|
107
|
+
h2 = murmur3_128_rotl(h2, 31)
|
108
|
+
h2 = (h1 + h2) & MASK64
|
109
|
+
h2 = (h2*5 + 0x38495ab5) & MASK64
|
110
|
+
end
|
111
|
+
|
112
|
+
unless tail.empty?
|
113
|
+
if tail.size > 8
|
114
|
+
k2 = 0
|
115
|
+
tail[8,8].reverse_each do |c2|
|
116
|
+
k2 = (k2 << 8) | c2
|
117
|
+
end
|
118
|
+
h2 ^= murmur3_128__mmix2(k2)
|
119
|
+
end
|
120
|
+
k1 = 0
|
121
|
+
tail[0,8].reverse_each do |c1|
|
122
|
+
k1 = (k1 << 8) | c1
|
123
|
+
end
|
124
|
+
h1 ^= murmur3_128__mmix1(k1)
|
125
|
+
end
|
126
|
+
|
127
|
+
h1 ^= str.bytesize
|
128
|
+
h2 ^= str.bytesize
|
129
|
+
h1 = (h1 + h2) & MASK64
|
130
|
+
h2 = (h1 + h2) & MASK64
|
131
|
+
h1 = murmur3_128_fmix(h1)
|
132
|
+
h2 = murmur3_128_fmix(h2)
|
133
|
+
|
134
|
+
h1 = (h1 + h2) & MASK64
|
135
|
+
h2 = (h1 + h2) & MASK64
|
136
|
+
[h1 & 0xffffffff, h1 >> 32, h2 & 0xffffffff, h2 >> 32]
|
137
|
+
end
|
138
|
+
|
139
|
+
def murmur3_128_int32_hash(i, seed=0)
|
140
|
+
str_hash([i].pack("V"), seed)
|
141
|
+
end
|
142
|
+
|
143
|
+
def murmur3_128_int64_hash(i, seed=0)
|
144
|
+
str_hash([i].pack("Q<"), seed)
|
145
|
+
end
|
146
|
+
|
147
|
+
class << self
|
148
|
+
include MurmurHash3::PureRuby128
|
149
|
+
alias fmix murmur3_128_fmix
|
150
|
+
alias str_hash murmur3_128_str_hash
|
151
|
+
alias int32_hash murmur3_128_int32_hash
|
152
|
+
alias int64_hash murmur3_128_int64_hash
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
data/test/test_murmur.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'minitest/spec'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
|
4
|
+
shared_examples_128 = proc do
|
5
|
+
it 'should make correct hash for string' do
|
6
|
+
murmur.str_hash('asdfqwer', 0).must_equal [0xd6d7d367, 0xcb41f064, 0x8973cd72, 0xc345e72e]
|
7
|
+
murmur.str_hash('asdfqwerzxcvyui', 0).must_equal [0x007b2172f, 0x64ecae1b, 0x1813b5a5, 0x9c674ee6]
|
8
|
+
murmur.str_hash('asdfqwerzxcvyuio', 0).must_equal [0xf508df57, 0xbb38f3fd, 0xf48c9d98, 0xb65c36cd]
|
9
|
+
murmur.str_hash('asdfqwerzxcvyuio!', 0).must_equal [0x8a011755, 0xb13d463f, 0x8386d32a, 0x0df8884c]
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should make correct hash for 32bit integer' do
|
13
|
+
murmur.int32_hash(1717859169).must_equal [0x20b48108, 0x10369ceb, 0x3ad523cc, 0xdacb587f]
|
14
|
+
murmur.int32_hash(1717859169).must_equal murmur.str_hash('asdf')
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'should make correct hash for 64bit integer' do
|
18
|
+
murmur.int64_hash(0x12345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x00\x00\x00\x00")
|
19
|
+
murmur.int64_hash(0x1234567812345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x78\x56\x34\x12")
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should make correct fmix for 64bit integer' do
|
23
|
+
murmur.fmix(1717859169).must_equal 0xbefb9076a3712207
|
24
|
+
murmur.fmix(12345678912345678).must_equal 0x197ef59146f5221c
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
shared_examples_32 = proc do
|
29
|
+
it 'should make correct hash for string' do
|
30
|
+
murmur.str_hash('asdfqwer', 0).must_equal 0xa46b5209
|
31
|
+
murmur.str_hash('asdfqwerty', 0).must_equal 0xa3cfe04b
|
32
|
+
murmur.str_hash('asd', 0).must_equal 0x14570c6f
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'should make correct hash for 32bit integer' do
|
36
|
+
murmur.int32_hash(1717859169).must_equal 0x1b20e026
|
37
|
+
murmur.int32_hash(1717859169).must_equal murmur.str_hash('asdf')
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'should make correct hash for 64bit integer' do
|
41
|
+
murmur.int64_hash(0x12345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x00\x00\x00\x00")
|
42
|
+
murmur.int64_hash(0x1234567812345678).must_equal murmur.str_hash("\x78\x56\x34\x12\x78\x56\x34\x12")
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'should make correct fmix for 32bit integer' do
|
46
|
+
murmur.fmix(1717859169).must_equal 0x17561734
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
require 'murmurhash3/pure_ruby'
|
51
|
+
describe "Pure ruby 32" do
|
52
|
+
let(:murmur) { MurmurHash3::PureRuby32 }
|
53
|
+
class_exec &shared_examples_32
|
54
|
+
end
|
55
|
+
|
56
|
+
describe "Pure ruby 128" do
|
57
|
+
let(:murmur) { MurmurHash3::PureRuby128 }
|
58
|
+
class_exec &shared_examples_128
|
59
|
+
end
|
60
|
+
|
61
|
+
begin
|
62
|
+
require 'murmurhash3/native_murmur'
|
63
|
+
|
64
|
+
describe "Native 32" do
|
65
|
+
let(:murmur) { MurmurHash3::Native32 }
|
66
|
+
class_exec &shared_examples_32
|
67
|
+
end
|
68
|
+
|
69
|
+
describe "Native 128" do
|
70
|
+
let(:murmur) { MurmurHash3::Native128 }
|
71
|
+
class_exec &shared_examples_128
|
72
|
+
end
|
73
|
+
|
74
|
+
rescue LoadError => e
|
75
|
+
puts "Could not load native extension: #{e}"
|
76
|
+
end
|
metadata
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: murmurhash3
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Sokolov Yura 'funny-falcon'
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-08-02 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: implementation of murmur3 hashing function
|
15
|
+
email:
|
16
|
+
- funny.falcon@gmail.com
|
17
|
+
executables: []
|
18
|
+
extensions:
|
19
|
+
- ext/murmurhash3/extconf.rb
|
20
|
+
extra_rdoc_files: []
|
21
|
+
files:
|
22
|
+
- ext/murmurhash3/extconf.rb
|
23
|
+
- ext/murmurhash3/murmur3.c
|
24
|
+
- lib/murmurhash3/pure_ruby.rb
|
25
|
+
- lib/murmurhash3/version.rb
|
26
|
+
- lib/murmurhash3.rb
|
27
|
+
- test/test_murmur.rb
|
28
|
+
homepage: https://github.com/funny-falcon/murmurhash3
|
29
|
+
licenses: []
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
- ext
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
none: false
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
requirements: []
|
48
|
+
rubyforge_project:
|
49
|
+
rubygems_version: 1.8.24
|
50
|
+
signing_key:
|
51
|
+
specification_version: 3
|
52
|
+
summary: implements mumur3 hashing function
|
53
|
+
test_files:
|
54
|
+
- test/test_murmur.rb
|