murmurhash3 0.1.3.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/murmurhash3/murmur3.c +203 -13
- data/lib/murmurhash3/pure_ruby.rb +30 -0
- data/lib/murmurhash3/version.rb +1 -1
- metadata +5 -5
data/ext/murmurhash3/murmur3.c
CHANGED
@@ -109,6 +109,26 @@ getblock64(const uint64_t * p, int i)
|
|
109
109
|
#define getblock64(p, i) (p[i])
|
110
110
|
#endif
|
111
111
|
|
112
|
+
static const char hex[] =
|
113
|
+
"000102030405060708090a0b0c0d0e0f" \
|
114
|
+
"101112131415161718191a1b1c1d1e1f" \
|
115
|
+
"202122232425262728292a2b2c2d2e2f" \
|
116
|
+
"303132333435363738393a3b3c3d3e3f" \
|
117
|
+
"404142434445464748494a4b4c4d4e4f" \
|
118
|
+
"505152535455565758595a5b5c5d5e5f" \
|
119
|
+
"606162636465666768696a6b6c6d6e6f" \
|
120
|
+
"707172737475767778797a7b7c7d7e7f" \
|
121
|
+
"808182838485868788898a8b8c8d8e8f" \
|
122
|
+
"909192939495969798999a9b9c9d9e9f" \
|
123
|
+
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" \
|
124
|
+
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" \
|
125
|
+
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" \
|
126
|
+
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" \
|
127
|
+
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef" \
|
128
|
+
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
129
|
+
static const char base64[] =
|
130
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
131
|
+
|
112
132
|
/* Finalization mix - force all bits of a hash block to avalanche */
|
113
133
|
|
114
134
|
static inline FORCE_INLINE uint32_t
|
@@ -161,7 +181,7 @@ MurmurHash3_x86_32 ( const void * key, long len, uint32_t seed)
|
|
161
181
|
for(i = -nblocks; i; i++)
|
162
182
|
{
|
163
183
|
h1 ^= mmix32(getblock32(blocks, i));
|
164
|
-
h1 = ROTL32(h1,13);
|
184
|
+
h1 = ROTL32(h1,13);
|
165
185
|
h1 = h1*5+0xe6546b64;
|
166
186
|
}
|
167
187
|
|
@@ -184,7 +204,7 @@ MurmurHash3_x86_32 ( const void * key, long len, uint32_t seed)
|
|
184
204
|
h1 = fmix32(h1);
|
185
205
|
|
186
206
|
return h1;
|
187
|
-
}
|
207
|
+
}
|
188
208
|
|
189
209
|
#define C1_128 BIG_CONSTANT(0x87c37b91114253d5)
|
190
210
|
#define C2_128 BIG_CONSTANT(0x4cf5ad432745937f)
|
@@ -297,11 +317,10 @@ rb_fmix64(VALUE self, VALUE integer)
|
|
297
317
|
#endif
|
298
318
|
}
|
299
319
|
|
300
|
-
static
|
301
|
-
|
320
|
+
static uint32_t
|
321
|
+
rb_murmur3_32_hash(int argc, VALUE* argv, VALUE self)
|
302
322
|
{
|
303
323
|
VALUE rstr;
|
304
|
-
uint32_t result;
|
305
324
|
|
306
325
|
if (argc == 0 || argc > 2) {
|
307
326
|
rb_raise(rb_eArgError, "accept 1 or 2 arguments: (string[, seed])");
|
@@ -309,15 +328,90 @@ rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
|
|
309
328
|
rstr = argv[0];
|
310
329
|
StringValue(rstr);
|
311
330
|
|
312
|
-
|
331
|
+
return MurmurHash3_x86_32(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]));
|
332
|
+
}
|
313
333
|
|
314
|
-
|
334
|
+
static VALUE
|
335
|
+
rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
|
336
|
+
{
|
337
|
+
return UINT2NUM(rb_murmur3_32_hash(argc, argv, self));
|
338
|
+
}
|
339
|
+
|
340
|
+
#define SWAP_32_INT(t) do { \
|
341
|
+
(t) = ((t) >> 24) | (((t) >> 8) & 0xff00) | \
|
342
|
+
(((t) & 0xff00) << 8) | ((t) << 24); \
|
343
|
+
} while (0)
|
344
|
+
static VALUE
|
345
|
+
rb_murmur3_32_str_digest(int argc, VALUE* argv, VALUE self)
|
346
|
+
{
|
347
|
+
union {
|
348
|
+
uint32_t result;
|
349
|
+
char res[4];
|
350
|
+
} r;
|
351
|
+
|
352
|
+
r.result = rb_murmur3_32_hash(argc, argv, self);
|
353
|
+
#if WORDS_BIGENDIAN
|
354
|
+
SWAP_32_INT(r.result);
|
355
|
+
#endif
|
356
|
+
|
357
|
+
return rb_str_new(r.res, 4);
|
358
|
+
}
|
359
|
+
|
360
|
+
static VALUE
|
361
|
+
rb_murmur3_32_str_hexdigest(int argc, VALUE* argv, VALUE self)
|
362
|
+
{
|
363
|
+
union {
|
364
|
+
uint32_t result;
|
365
|
+
unsigned char res[4];
|
366
|
+
} r;
|
367
|
+
char out[8];
|
368
|
+
int i;
|
369
|
+
|
370
|
+
r.result = rb_murmur3_32_hash(argc, argv, self);
|
371
|
+
#if WORDS_BIGENDIAN
|
372
|
+
SWAP_32_INT(r.result);
|
373
|
+
#endif
|
374
|
+
for(i = 0; i<4; i++) {
|
375
|
+
out[i*2] = hex[r.res[i]*2];
|
376
|
+
out[i*2+1] = hex[r.res[i]*2+1];
|
377
|
+
}
|
378
|
+
|
379
|
+
return rb_str_new(out, 8);
|
380
|
+
}
|
381
|
+
|
382
|
+
static VALUE
|
383
|
+
rb_murmur3_32_str_base64digest(int argc, VALUE *argv, VALUE self)
|
384
|
+
{
|
385
|
+
union {
|
386
|
+
uint32_t result;
|
387
|
+
unsigned char res[6];
|
388
|
+
} r;
|
389
|
+
char out[8];
|
390
|
+
int i;
|
391
|
+
r.result = rb_murmur3_32_hash(argc, argv, self);
|
392
|
+
#if WORDS_BIGENDIAN
|
393
|
+
SWAP_32_INT(r.result);
|
394
|
+
#endif
|
395
|
+
r.res[4] = 0;
|
396
|
+
r.res[5] = 0;
|
397
|
+
for(i = 0; i<2; i++) {
|
398
|
+
uint32_t b64 =
|
399
|
+
((uint32_t)r.res[i*3+0] << 16) |
|
400
|
+
((uint32_t)r.res[i*3+1] << 8) |
|
401
|
+
(uint32_t)r.res[i*3+2];
|
402
|
+
out[i*4+0] = base64[(b64 >> 18) & 0x3f];
|
403
|
+
out[i*4+1] = base64[(b64 >> 12) & 0x3f];
|
404
|
+
out[i*4+2] = base64[(b64 >> 6) & 0x3f];
|
405
|
+
out[i*4+3] = base64[(b64 >> 0) & 0x3f];
|
406
|
+
}
|
407
|
+
out[6] = '=';
|
408
|
+
out[7] = '=';
|
409
|
+
return rb_str_new(out, sizeof(out));
|
315
410
|
}
|
316
411
|
|
317
412
|
static VALUE
|
318
413
|
rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
|
319
414
|
{
|
320
|
-
VALUE rint;
|
321
415
|
uint32_t _int;
|
322
416
|
uint32_t result;
|
323
417
|
|
@@ -334,7 +428,6 @@ rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
|
|
334
428
|
static VALUE
|
335
429
|
rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
336
430
|
{
|
337
|
-
VALUE rint;
|
338
431
|
uint64_t _int;
|
339
432
|
uint32_t result;
|
340
433
|
|
@@ -366,6 +459,20 @@ rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
|
366
459
|
result[3] = tmp; \
|
367
460
|
} while (0)
|
368
461
|
|
462
|
+
#define SWAP_128_BIT_BYTE() do { \
|
463
|
+
uint32_t tmp; \
|
464
|
+
tmp = r.result[0]; \
|
465
|
+
SWAP_32_INT(tmp); \
|
466
|
+
SWAP_32_INT(r.result[1]); \
|
467
|
+
r.result[0] = r.result[1]; \
|
468
|
+
r.result[1] = tmp; \
|
469
|
+
tmp = r.result[2]; \
|
470
|
+
SWAP_32_INT(tmp); \
|
471
|
+
SWAP_32_INT(r.result[3]); \
|
472
|
+
r.result[2] = r.result[3]; \
|
473
|
+
r.result[3] = tmp; \
|
474
|
+
} while (0)
|
475
|
+
|
369
476
|
#define RETURN_128_BIT() \
|
370
477
|
ar_result = rb_ary_new2(4); \
|
371
478
|
rb_ary_push(ar_result, UINT2NUM(result[0])); \
|
@@ -374,11 +481,10 @@ rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
|
374
481
|
rb_ary_push(ar_result, UINT2NUM(result[3])); \
|
375
482
|
return ar_result
|
376
483
|
|
377
|
-
static
|
378
|
-
|
484
|
+
static void
|
485
|
+
rb_murmur3_128_hash(int argc, VALUE* argv, VALUE self, uint32_t result[4])
|
379
486
|
{
|
380
|
-
VALUE rstr
|
381
|
-
uint32_t result[4];
|
487
|
+
VALUE rstr;
|
382
488
|
|
383
489
|
if (argc == 0 || argc > 2) {
|
384
490
|
rb_raise(rb_eArgError, "accept 1 or 2 arguments: (string[, seed])");
|
@@ -387,12 +493,84 @@ rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
|
|
387
493
|
StringValue(rstr);
|
388
494
|
|
389
495
|
MurmurHash3_x64_128(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]), result);
|
496
|
+
}
|
497
|
+
|
498
|
+
static VALUE
|
499
|
+
rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
|
500
|
+
{
|
501
|
+
VALUE ar_result;
|
502
|
+
uint32_t result[4];
|
503
|
+
rb_murmur3_128_hash(argc, argv, self, result);
|
390
504
|
#if WORDS_BIGENDIAN
|
391
505
|
SWAP_128_BIT();
|
392
506
|
#endif
|
393
507
|
RETURN_128_BIT();
|
394
508
|
}
|
395
509
|
|
510
|
+
static VALUE
|
511
|
+
rb_murmur3_128_str_digest(int argc, VALUE *argv, VALUE self)
|
512
|
+
{
|
513
|
+
union {
|
514
|
+
uint32_t result[4];
|
515
|
+
char res[16];
|
516
|
+
} r;
|
517
|
+
rb_murmur3_128_hash(argc, argv, self, r.result);
|
518
|
+
#if WORDS_BIGENDIAN
|
519
|
+
SWAP_128_BIT_BYTE();
|
520
|
+
#endif
|
521
|
+
return rb_str_new(r.res, sizeof(r.res));
|
522
|
+
}
|
523
|
+
|
524
|
+
static VALUE
|
525
|
+
rb_murmur3_128_str_hexdigest(int argc, VALUE *argv, VALUE self)
|
526
|
+
{
|
527
|
+
union {
|
528
|
+
uint32_t result[4];
|
529
|
+
unsigned char res[16];
|
530
|
+
} r;
|
531
|
+
char out[32];
|
532
|
+
int i;
|
533
|
+
rb_murmur3_128_hash(argc, argv, self, r.result);
|
534
|
+
#if WORDS_BIGENDIAN
|
535
|
+
SWAP_128_BIT_BYTE();
|
536
|
+
#endif
|
537
|
+
for(i = 0; i<16; i++) {
|
538
|
+
out[i*2] = hex[r.res[i]*2];
|
539
|
+
out[i*2+1] = hex[r.res[i]*2+1];
|
540
|
+
}
|
541
|
+
return rb_str_new(out, sizeof(out));
|
542
|
+
}
|
543
|
+
|
544
|
+
static VALUE
|
545
|
+
rb_murmur3_128_str_base64digest(int argc, VALUE *argv, VALUE self)
|
546
|
+
{
|
547
|
+
union {
|
548
|
+
uint32_t result[4];
|
549
|
+
unsigned char res[18];
|
550
|
+
} r;
|
551
|
+
char out[24];
|
552
|
+
int i;
|
553
|
+
rb_murmur3_128_hash(argc, argv, self, r.result);
|
554
|
+
#if WORDS_BIGENDIAN
|
555
|
+
SWAP_128_BIT_BYTE();
|
556
|
+
#endif
|
557
|
+
r.res[16] = 0;
|
558
|
+
r.res[17] = 0;
|
559
|
+
for(i = 0; i<6; i++) {
|
560
|
+
uint32_t b64 =
|
561
|
+
((uint32_t)r.res[i*3+0] << 16) |
|
562
|
+
((uint32_t)r.res[i*3+1] << 8) |
|
563
|
+
(uint32_t)r.res[i*3+2];
|
564
|
+
out[i*4+0] = base64[(b64 >> 18) & 0x3f];
|
565
|
+
out[i*4+1] = base64[(b64 >> 12) & 0x3f];
|
566
|
+
out[i*4+2] = base64[(b64 >> 6) & 0x3f];
|
567
|
+
out[i*4+3] = base64[(b64 >> 0) & 0x3f];
|
568
|
+
}
|
569
|
+
out[22] = '=';
|
570
|
+
out[23] = '=';
|
571
|
+
return rb_str_new(out, sizeof(out));
|
572
|
+
}
|
573
|
+
|
396
574
|
static VALUE
|
397
575
|
rb_murmur3_128_int32_hash(int argc, VALUE* argv, VALUE self)
|
398
576
|
{
|
@@ -441,6 +619,9 @@ Init_native_murmur() {
|
|
441
619
|
|
442
620
|
rb_define_method(mod_murmur32, "murmur3_32_fmix", rb_fmix32, 1);
|
443
621
|
rb_define_method(mod_murmur32, "murmur3_32_str_hash", rb_murmur3_32_str_hash, -1);
|
622
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_digest", rb_murmur3_32_str_digest, -1);
|
623
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_hexdigest", rb_murmur3_32_str_hexdigest, -1);
|
624
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_base64digest", rb_murmur3_32_str_base64digest, -1);
|
444
625
|
rb_define_method(mod_murmur32, "murmur3_32_int32_hash", rb_murmur3_32_int32_hash, -1);
|
445
626
|
rb_define_method(mod_murmur32, "murmur3_32_int64_hash", rb_murmur3_32_int64_hash, -1);
|
446
627
|
|
@@ -448,12 +629,18 @@ Init_native_murmur() {
|
|
448
629
|
singleton = rb_singleton_class(mod_murmur32);
|
449
630
|
rb_define_alias(singleton, "fmix", "murmur3_32_fmix");
|
450
631
|
rb_define_alias(singleton, "str_hash", "murmur3_32_str_hash");
|
632
|
+
rb_define_alias(singleton, "str_digest", "murmur3_32_str_digest");
|
633
|
+
rb_define_alias(singleton, "str_hexdigest", "murmur3_32_str_hexdigest");
|
634
|
+
rb_define_alias(singleton, "str_base64digest", "murmur3_32_str_base64digest");
|
451
635
|
rb_define_alias(singleton, "int32_hash", "murmur3_32_int32_hash");
|
452
636
|
rb_define_alias(singleton, "int64_hash", "murmur3_32_int64_hash");
|
453
637
|
|
454
638
|
|
455
639
|
rb_define_method(mod_murmur128, "murmur3_128_fmix", rb_fmix64, 1);
|
456
640
|
rb_define_method(mod_murmur128, "murmur3_128_str_hash", rb_murmur3_128_str_hash, -1);
|
641
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_digest", rb_murmur3_128_str_digest, -1);
|
642
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_hexdigest", rb_murmur3_128_str_hexdigest, -1);
|
643
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_base64digest", rb_murmur3_128_str_base64digest, -1);
|
457
644
|
rb_define_method(mod_murmur128, "murmur3_128_int32_hash", rb_murmur3_128_int32_hash, -1);
|
458
645
|
rb_define_method(mod_murmur128, "murmur3_128_int64_hash", rb_murmur3_128_int64_hash, -1);
|
459
646
|
|
@@ -461,6 +648,9 @@ Init_native_murmur() {
|
|
461
648
|
singleton = rb_singleton_class(mod_murmur128);
|
462
649
|
rb_define_alias(singleton, "fmix", "murmur3_128_fmix");
|
463
650
|
rb_define_alias(singleton, "str_hash", "murmur3_128_str_hash");
|
651
|
+
rb_define_alias(singleton, "str_digest", "murmur3_128_str_digest");
|
652
|
+
rb_define_alias(singleton, "str_hexdigest", "murmur3_128_str_hexdigest");
|
653
|
+
rb_define_alias(singleton, "str_base64digest", "murmur3_128_str_base64digest");
|
464
654
|
rb_define_alias(singleton, "int32_hash", "murmur3_128_int32_hash");
|
465
655
|
rb_define_alias(singleton, "int64_hash", "murmur3_128_int64_hash");
|
466
656
|
|
@@ -53,10 +53,25 @@ module MurmurHash3
|
|
53
53
|
str_hash([i].pack("Q<"), seed)
|
54
54
|
end
|
55
55
|
|
56
|
+
def murmur3_32_str_digest(str, seed=0)
|
57
|
+
[str_hash(str, seed)].pack("V")
|
58
|
+
end
|
59
|
+
|
60
|
+
def murmur3_32_str_hexdigest(str, seed=0)
|
61
|
+
[str_hash(str, seed)].pack("V").unpack("H*")[0]
|
62
|
+
end
|
63
|
+
|
64
|
+
def murmur3_32_str_base64digest(str, seed=0)
|
65
|
+
[[str_hash(str, seed)].pack("V")].pack("m").chomp!
|
66
|
+
end
|
67
|
+
|
56
68
|
class << self
|
57
69
|
include MurmurHash3::PureRuby32
|
58
70
|
alias fmix murmur3_32_fmix
|
59
71
|
alias str_hash murmur3_32_str_hash
|
72
|
+
alias str_digest murmur3_32_str_digest
|
73
|
+
alias str_hexdigest murmur3_32_str_hexdigest
|
74
|
+
alias str_base64digest murmur3_32_str_base64digest
|
60
75
|
alias int32_hash murmur3_32_int32_hash
|
61
76
|
alias int64_hash murmur3_32_int64_hash
|
62
77
|
end
|
@@ -144,10 +159,25 @@ module MurmurHash3
|
|
144
159
|
str_hash([i].pack("Q<"), seed)
|
145
160
|
end
|
146
161
|
|
162
|
+
def murmur3_128_str_digest(str, seed=0)
|
163
|
+
str_hash(str, seed).pack("V4")
|
164
|
+
end
|
165
|
+
|
166
|
+
def murmur3_128_str_hexdigest(str, seed=0)
|
167
|
+
str_hash(str, seed).pack("V4").unpack("H*")[0]
|
168
|
+
end
|
169
|
+
|
170
|
+
def murmur3_128_str_base64digest(str, seed=0)
|
171
|
+
[str_hash(str, seed).pack("V4")].pack('m').chomp!
|
172
|
+
end
|
173
|
+
|
147
174
|
class << self
|
148
175
|
include MurmurHash3::PureRuby128
|
149
176
|
alias fmix murmur3_128_fmix
|
150
177
|
alias str_hash murmur3_128_str_hash
|
178
|
+
alias str_digest murmur3_128_str_digest
|
179
|
+
alias str_hexdigest murmur3_128_str_hexdigest
|
180
|
+
alias str_base64digest murmur3_128_str_base64digest
|
151
181
|
alias int32_hash murmur3_128_int32_hash
|
152
182
|
alias int64_hash murmur3_128_int64_hash
|
153
183
|
end
|
data/lib/murmurhash3/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: murmurhash3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.3.1
|
5
4
|
prerelease:
|
5
|
+
version: 0.1.4
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Sokolov Yura 'funny-falcon'
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-15 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: implementation of murmur3 hashing function
|
15
15
|
email:
|
@@ -34,20 +34,20 @@ require_paths:
|
|
34
34
|
- lib
|
35
35
|
- ext
|
36
36
|
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
37
38
|
requirements:
|
38
39
|
- - ! '>='
|
39
40
|
- !ruby/object:Gem::Version
|
40
41
|
version: '0'
|
41
|
-
none: false
|
42
42
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
43
44
|
requirements:
|
44
45
|
- - ! '>='
|
45
46
|
- !ruby/object:Gem::Version
|
46
47
|
version: '0'
|
47
|
-
none: false
|
48
48
|
requirements: []
|
49
49
|
rubyforge_project:
|
50
|
-
rubygems_version: 1.8.
|
50
|
+
rubygems_version: 1.8.23
|
51
51
|
signing_key:
|
52
52
|
specification_version: 3
|
53
53
|
summary: implements mumur3 hashing function
|