murmurhash3 0.1.3.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/murmurhash3/murmur3.c +203 -13
- data/lib/murmurhash3/pure_ruby.rb +30 -0
- data/lib/murmurhash3/version.rb +1 -1
- metadata +5 -5
data/ext/murmurhash3/murmur3.c
CHANGED
@@ -109,6 +109,26 @@ getblock64(const uint64_t * p, int i)
|
|
109
109
|
#define getblock64(p, i) (p[i])
|
110
110
|
#endif
|
111
111
|
|
112
|
+
static const char hex[] =
|
113
|
+
"000102030405060708090a0b0c0d0e0f" \
|
114
|
+
"101112131415161718191a1b1c1d1e1f" \
|
115
|
+
"202122232425262728292a2b2c2d2e2f" \
|
116
|
+
"303132333435363738393a3b3c3d3e3f" \
|
117
|
+
"404142434445464748494a4b4c4d4e4f" \
|
118
|
+
"505152535455565758595a5b5c5d5e5f" \
|
119
|
+
"606162636465666768696a6b6c6d6e6f" \
|
120
|
+
"707172737475767778797a7b7c7d7e7f" \
|
121
|
+
"808182838485868788898a8b8c8d8e8f" \
|
122
|
+
"909192939495969798999a9b9c9d9e9f" \
|
123
|
+
"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" \
|
124
|
+
"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" \
|
125
|
+
"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" \
|
126
|
+
"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" \
|
127
|
+
"e0e1e2e3e4e5e6e7e8e9eaebecedeeef" \
|
128
|
+
"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
|
129
|
+
static const char base64[] =
|
130
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
131
|
+
|
112
132
|
/* Finalization mix - force all bits of a hash block to avalanche */
|
113
133
|
|
114
134
|
static inline FORCE_INLINE uint32_t
|
@@ -161,7 +181,7 @@ MurmurHash3_x86_32 ( const void * key, long len, uint32_t seed)
|
|
161
181
|
for(i = -nblocks; i; i++)
|
162
182
|
{
|
163
183
|
h1 ^= mmix32(getblock32(blocks, i));
|
164
|
-
h1 = ROTL32(h1,13);
|
184
|
+
h1 = ROTL32(h1,13);
|
165
185
|
h1 = h1*5+0xe6546b64;
|
166
186
|
}
|
167
187
|
|
@@ -184,7 +204,7 @@ MurmurHash3_x86_32 ( const void * key, long len, uint32_t seed)
|
|
184
204
|
h1 = fmix32(h1);
|
185
205
|
|
186
206
|
return h1;
|
187
|
-
}
|
207
|
+
}
|
188
208
|
|
189
209
|
#define C1_128 BIG_CONSTANT(0x87c37b91114253d5)
|
190
210
|
#define C2_128 BIG_CONSTANT(0x4cf5ad432745937f)
|
@@ -297,11 +317,10 @@ rb_fmix64(VALUE self, VALUE integer)
|
|
297
317
|
#endif
|
298
318
|
}
|
299
319
|
|
300
|
-
static
|
301
|
-
|
320
|
+
static uint32_t
|
321
|
+
rb_murmur3_32_hash(int argc, VALUE* argv, VALUE self)
|
302
322
|
{
|
303
323
|
VALUE rstr;
|
304
|
-
uint32_t result;
|
305
324
|
|
306
325
|
if (argc == 0 || argc > 2) {
|
307
326
|
rb_raise(rb_eArgError, "accept 1 or 2 arguments: (string[, seed])");
|
@@ -309,15 +328,90 @@ rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
|
|
309
328
|
rstr = argv[0];
|
310
329
|
StringValue(rstr);
|
311
330
|
|
312
|
-
|
331
|
+
return MurmurHash3_x86_32(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]));
|
332
|
+
}
|
313
333
|
|
314
|
-
|
334
|
+
static VALUE
|
335
|
+
rb_murmur3_32_str_hash(int argc, VALUE* argv, VALUE self)
|
336
|
+
{
|
337
|
+
return UINT2NUM(rb_murmur3_32_hash(argc, argv, self));
|
338
|
+
}
|
339
|
+
|
340
|
+
#define SWAP_32_INT(t) do { \
|
341
|
+
(t) = ((t) >> 24) | (((t) >> 8) & 0xff00) | \
|
342
|
+
(((t) & 0xff00) << 8) | ((t) << 24); \
|
343
|
+
} while (0)
|
344
|
+
static VALUE
|
345
|
+
rb_murmur3_32_str_digest(int argc, VALUE* argv, VALUE self)
|
346
|
+
{
|
347
|
+
union {
|
348
|
+
uint32_t result;
|
349
|
+
char res[4];
|
350
|
+
} r;
|
351
|
+
|
352
|
+
r.result = rb_murmur3_32_hash(argc, argv, self);
|
353
|
+
#if WORDS_BIGENDIAN
|
354
|
+
SWAP_32_INT(r.result);
|
355
|
+
#endif
|
356
|
+
|
357
|
+
return rb_str_new(r.res, 4);
|
358
|
+
}
|
359
|
+
|
360
|
+
static VALUE
|
361
|
+
rb_murmur3_32_str_hexdigest(int argc, VALUE* argv, VALUE self)
|
362
|
+
{
|
363
|
+
union {
|
364
|
+
uint32_t result;
|
365
|
+
unsigned char res[4];
|
366
|
+
} r;
|
367
|
+
char out[8];
|
368
|
+
int i;
|
369
|
+
|
370
|
+
r.result = rb_murmur3_32_hash(argc, argv, self);
|
371
|
+
#if WORDS_BIGENDIAN
|
372
|
+
SWAP_32_INT(r.result);
|
373
|
+
#endif
|
374
|
+
for(i = 0; i<4; i++) {
|
375
|
+
out[i*2] = hex[r.res[i]*2];
|
376
|
+
out[i*2+1] = hex[r.res[i]*2+1];
|
377
|
+
}
|
378
|
+
|
379
|
+
return rb_str_new(out, 8);
|
380
|
+
}
|
381
|
+
|
382
|
+
static VALUE
|
383
|
+
rb_murmur3_32_str_base64digest(int argc, VALUE *argv, VALUE self)
|
384
|
+
{
|
385
|
+
union {
|
386
|
+
uint32_t result;
|
387
|
+
unsigned char res[6];
|
388
|
+
} r;
|
389
|
+
char out[8];
|
390
|
+
int i;
|
391
|
+
r.result = rb_murmur3_32_hash(argc, argv, self);
|
392
|
+
#if WORDS_BIGENDIAN
|
393
|
+
SWAP_32_INT(r.result);
|
394
|
+
#endif
|
395
|
+
r.res[4] = 0;
|
396
|
+
r.res[5] = 0;
|
397
|
+
for(i = 0; i<2; i++) {
|
398
|
+
uint32_t b64 =
|
399
|
+
((uint32_t)r.res[i*3+0] << 16) |
|
400
|
+
((uint32_t)r.res[i*3+1] << 8) |
|
401
|
+
(uint32_t)r.res[i*3+2];
|
402
|
+
out[i*4+0] = base64[(b64 >> 18) & 0x3f];
|
403
|
+
out[i*4+1] = base64[(b64 >> 12) & 0x3f];
|
404
|
+
out[i*4+2] = base64[(b64 >> 6) & 0x3f];
|
405
|
+
out[i*4+3] = base64[(b64 >> 0) & 0x3f];
|
406
|
+
}
|
407
|
+
out[6] = '=';
|
408
|
+
out[7] = '=';
|
409
|
+
return rb_str_new(out, sizeof(out));
|
315
410
|
}
|
316
411
|
|
317
412
|
static VALUE
|
318
413
|
rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
|
319
414
|
{
|
320
|
-
VALUE rint;
|
321
415
|
uint32_t _int;
|
322
416
|
uint32_t result;
|
323
417
|
|
@@ -334,7 +428,6 @@ rb_murmur3_32_int32_hash(int argc, VALUE* argv, VALUE self)
|
|
334
428
|
static VALUE
|
335
429
|
rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
336
430
|
{
|
337
|
-
VALUE rint;
|
338
431
|
uint64_t _int;
|
339
432
|
uint32_t result;
|
340
433
|
|
@@ -366,6 +459,20 @@ rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
|
366
459
|
result[3] = tmp; \
|
367
460
|
} while (0)
|
368
461
|
|
462
|
+
#define SWAP_128_BIT_BYTE() do { \
|
463
|
+
uint32_t tmp; \
|
464
|
+
tmp = r.result[0]; \
|
465
|
+
SWAP_32_INT(tmp); \
|
466
|
+
SWAP_32_INT(r.result[1]); \
|
467
|
+
r.result[0] = r.result[1]; \
|
468
|
+
r.result[1] = tmp; \
|
469
|
+
tmp = r.result[2]; \
|
470
|
+
SWAP_32_INT(tmp); \
|
471
|
+
SWAP_32_INT(r.result[3]); \
|
472
|
+
r.result[2] = r.result[3]; \
|
473
|
+
r.result[3] = tmp; \
|
474
|
+
} while (0)
|
475
|
+
|
369
476
|
#define RETURN_128_BIT() \
|
370
477
|
ar_result = rb_ary_new2(4); \
|
371
478
|
rb_ary_push(ar_result, UINT2NUM(result[0])); \
|
@@ -374,11 +481,10 @@ rb_murmur3_32_int64_hash(int argc, VALUE* argv, VALUE self)
|
|
374
481
|
rb_ary_push(ar_result, UINT2NUM(result[3])); \
|
375
482
|
return ar_result
|
376
483
|
|
377
|
-
static
|
378
|
-
|
484
|
+
static void
|
485
|
+
rb_murmur3_128_hash(int argc, VALUE* argv, VALUE self, uint32_t result[4])
|
379
486
|
{
|
380
|
-
VALUE rstr
|
381
|
-
uint32_t result[4];
|
487
|
+
VALUE rstr;
|
382
488
|
|
383
489
|
if (argc == 0 || argc > 2) {
|
384
490
|
rb_raise(rb_eArgError, "accept 1 or 2 arguments: (string[, seed])");
|
@@ -387,12 +493,84 @@ rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
|
|
387
493
|
StringValue(rstr);
|
388
494
|
|
389
495
|
MurmurHash3_x64_128(RSTRING_PTR(rstr), RSTRING_LEN(rstr), argc == 1 ? 0 : NUM2UINT(argv[1]), result);
|
496
|
+
}
|
497
|
+
|
498
|
+
static VALUE
|
499
|
+
rb_murmur3_128_str_hash(int argc, VALUE* argv, VALUE self)
|
500
|
+
{
|
501
|
+
VALUE ar_result;
|
502
|
+
uint32_t result[4];
|
503
|
+
rb_murmur3_128_hash(argc, argv, self, result);
|
390
504
|
#if WORDS_BIGENDIAN
|
391
505
|
SWAP_128_BIT();
|
392
506
|
#endif
|
393
507
|
RETURN_128_BIT();
|
394
508
|
}
|
395
509
|
|
510
|
+
static VALUE
|
511
|
+
rb_murmur3_128_str_digest(int argc, VALUE *argv, VALUE self)
|
512
|
+
{
|
513
|
+
union {
|
514
|
+
uint32_t result[4];
|
515
|
+
char res[16];
|
516
|
+
} r;
|
517
|
+
rb_murmur3_128_hash(argc, argv, self, r.result);
|
518
|
+
#if WORDS_BIGENDIAN
|
519
|
+
SWAP_128_BIT_BYTE();
|
520
|
+
#endif
|
521
|
+
return rb_str_new(r.res, sizeof(r.res));
|
522
|
+
}
|
523
|
+
|
524
|
+
static VALUE
|
525
|
+
rb_murmur3_128_str_hexdigest(int argc, VALUE *argv, VALUE self)
|
526
|
+
{
|
527
|
+
union {
|
528
|
+
uint32_t result[4];
|
529
|
+
unsigned char res[16];
|
530
|
+
} r;
|
531
|
+
char out[32];
|
532
|
+
int i;
|
533
|
+
rb_murmur3_128_hash(argc, argv, self, r.result);
|
534
|
+
#if WORDS_BIGENDIAN
|
535
|
+
SWAP_128_BIT_BYTE();
|
536
|
+
#endif
|
537
|
+
for(i = 0; i<16; i++) {
|
538
|
+
out[i*2] = hex[r.res[i]*2];
|
539
|
+
out[i*2+1] = hex[r.res[i]*2+1];
|
540
|
+
}
|
541
|
+
return rb_str_new(out, sizeof(out));
|
542
|
+
}
|
543
|
+
|
544
|
+
static VALUE
|
545
|
+
rb_murmur3_128_str_base64digest(int argc, VALUE *argv, VALUE self)
|
546
|
+
{
|
547
|
+
union {
|
548
|
+
uint32_t result[4];
|
549
|
+
unsigned char res[18];
|
550
|
+
} r;
|
551
|
+
char out[24];
|
552
|
+
int i;
|
553
|
+
rb_murmur3_128_hash(argc, argv, self, r.result);
|
554
|
+
#if WORDS_BIGENDIAN
|
555
|
+
SWAP_128_BIT_BYTE();
|
556
|
+
#endif
|
557
|
+
r.res[16] = 0;
|
558
|
+
r.res[17] = 0;
|
559
|
+
for(i = 0; i<6; i++) {
|
560
|
+
uint32_t b64 =
|
561
|
+
((uint32_t)r.res[i*3+0] << 16) |
|
562
|
+
((uint32_t)r.res[i*3+1] << 8) |
|
563
|
+
(uint32_t)r.res[i*3+2];
|
564
|
+
out[i*4+0] = base64[(b64 >> 18) & 0x3f];
|
565
|
+
out[i*4+1] = base64[(b64 >> 12) & 0x3f];
|
566
|
+
out[i*4+2] = base64[(b64 >> 6) & 0x3f];
|
567
|
+
out[i*4+3] = base64[(b64 >> 0) & 0x3f];
|
568
|
+
}
|
569
|
+
out[22] = '=';
|
570
|
+
out[23] = '=';
|
571
|
+
return rb_str_new(out, sizeof(out));
|
572
|
+
}
|
573
|
+
|
396
574
|
static VALUE
|
397
575
|
rb_murmur3_128_int32_hash(int argc, VALUE* argv, VALUE self)
|
398
576
|
{
|
@@ -441,6 +619,9 @@ Init_native_murmur() {
|
|
441
619
|
|
442
620
|
rb_define_method(mod_murmur32, "murmur3_32_fmix", rb_fmix32, 1);
|
443
621
|
rb_define_method(mod_murmur32, "murmur3_32_str_hash", rb_murmur3_32_str_hash, -1);
|
622
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_digest", rb_murmur3_32_str_digest, -1);
|
623
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_hexdigest", rb_murmur3_32_str_hexdigest, -1);
|
624
|
+
rb_define_method(mod_murmur32, "murmur3_32_str_base64digest", rb_murmur3_32_str_base64digest, -1);
|
444
625
|
rb_define_method(mod_murmur32, "murmur3_32_int32_hash", rb_murmur3_32_int32_hash, -1);
|
445
626
|
rb_define_method(mod_murmur32, "murmur3_32_int64_hash", rb_murmur3_32_int64_hash, -1);
|
446
627
|
|
@@ -448,12 +629,18 @@ Init_native_murmur() {
|
|
448
629
|
singleton = rb_singleton_class(mod_murmur32);
|
449
630
|
rb_define_alias(singleton, "fmix", "murmur3_32_fmix");
|
450
631
|
rb_define_alias(singleton, "str_hash", "murmur3_32_str_hash");
|
632
|
+
rb_define_alias(singleton, "str_digest", "murmur3_32_str_digest");
|
633
|
+
rb_define_alias(singleton, "str_hexdigest", "murmur3_32_str_hexdigest");
|
634
|
+
rb_define_alias(singleton, "str_base64digest", "murmur3_32_str_base64digest");
|
451
635
|
rb_define_alias(singleton, "int32_hash", "murmur3_32_int32_hash");
|
452
636
|
rb_define_alias(singleton, "int64_hash", "murmur3_32_int64_hash");
|
453
637
|
|
454
638
|
|
455
639
|
rb_define_method(mod_murmur128, "murmur3_128_fmix", rb_fmix64, 1);
|
456
640
|
rb_define_method(mod_murmur128, "murmur3_128_str_hash", rb_murmur3_128_str_hash, -1);
|
641
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_digest", rb_murmur3_128_str_digest, -1);
|
642
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_hexdigest", rb_murmur3_128_str_hexdigest, -1);
|
643
|
+
rb_define_method(mod_murmur128, "murmur3_128_str_base64digest", rb_murmur3_128_str_base64digest, -1);
|
457
644
|
rb_define_method(mod_murmur128, "murmur3_128_int32_hash", rb_murmur3_128_int32_hash, -1);
|
458
645
|
rb_define_method(mod_murmur128, "murmur3_128_int64_hash", rb_murmur3_128_int64_hash, -1);
|
459
646
|
|
@@ -461,6 +648,9 @@ Init_native_murmur() {
|
|
461
648
|
singleton = rb_singleton_class(mod_murmur128);
|
462
649
|
rb_define_alias(singleton, "fmix", "murmur3_128_fmix");
|
463
650
|
rb_define_alias(singleton, "str_hash", "murmur3_128_str_hash");
|
651
|
+
rb_define_alias(singleton, "str_digest", "murmur3_128_str_digest");
|
652
|
+
rb_define_alias(singleton, "str_hexdigest", "murmur3_128_str_hexdigest");
|
653
|
+
rb_define_alias(singleton, "str_base64digest", "murmur3_128_str_base64digest");
|
464
654
|
rb_define_alias(singleton, "int32_hash", "murmur3_128_int32_hash");
|
465
655
|
rb_define_alias(singleton, "int64_hash", "murmur3_128_int64_hash");
|
466
656
|
|
@@ -53,10 +53,25 @@ module MurmurHash3
|
|
53
53
|
str_hash([i].pack("Q<"), seed)
|
54
54
|
end
|
55
55
|
|
56
|
+
def murmur3_32_str_digest(str, seed=0)
|
57
|
+
[str_hash(str, seed)].pack("V")
|
58
|
+
end
|
59
|
+
|
60
|
+
def murmur3_32_str_hexdigest(str, seed=0)
|
61
|
+
[str_hash(str, seed)].pack("V").unpack("H*")[0]
|
62
|
+
end
|
63
|
+
|
64
|
+
def murmur3_32_str_base64digest(str, seed=0)
|
65
|
+
[[str_hash(str, seed)].pack("V")].pack("m").chomp!
|
66
|
+
end
|
67
|
+
|
56
68
|
class << self
|
57
69
|
include MurmurHash3::PureRuby32
|
58
70
|
alias fmix murmur3_32_fmix
|
59
71
|
alias str_hash murmur3_32_str_hash
|
72
|
+
alias str_digest murmur3_32_str_digest
|
73
|
+
alias str_hexdigest murmur3_32_str_hexdigest
|
74
|
+
alias str_base64digest murmur3_32_str_base64digest
|
60
75
|
alias int32_hash murmur3_32_int32_hash
|
61
76
|
alias int64_hash murmur3_32_int64_hash
|
62
77
|
end
|
@@ -144,10 +159,25 @@ module MurmurHash3
|
|
144
159
|
str_hash([i].pack("Q<"), seed)
|
145
160
|
end
|
146
161
|
|
162
|
+
def murmur3_128_str_digest(str, seed=0)
|
163
|
+
str_hash(str, seed).pack("V4")
|
164
|
+
end
|
165
|
+
|
166
|
+
def murmur3_128_str_hexdigest(str, seed=0)
|
167
|
+
str_hash(str, seed).pack("V4").unpack("H*")[0]
|
168
|
+
end
|
169
|
+
|
170
|
+
def murmur3_128_str_base64digest(str, seed=0)
|
171
|
+
[str_hash(str, seed).pack("V4")].pack('m').chomp!
|
172
|
+
end
|
173
|
+
|
147
174
|
class << self
|
148
175
|
include MurmurHash3::PureRuby128
|
149
176
|
alias fmix murmur3_128_fmix
|
150
177
|
alias str_hash murmur3_128_str_hash
|
178
|
+
alias str_digest murmur3_128_str_digest
|
179
|
+
alias str_hexdigest murmur3_128_str_hexdigest
|
180
|
+
alias str_base64digest murmur3_128_str_base64digest
|
151
181
|
alias int32_hash murmur3_128_int32_hash
|
152
182
|
alias int64_hash murmur3_128_int64_hash
|
153
183
|
end
|
data/lib/murmurhash3/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: murmurhash3
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.3.1
|
5
4
|
prerelease:
|
5
|
+
version: 0.1.4
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Sokolov Yura 'funny-falcon'
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2014-01-15 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: implementation of murmur3 hashing function
|
15
15
|
email:
|
@@ -34,20 +34,20 @@ require_paths:
|
|
34
34
|
- lib
|
35
35
|
- ext
|
36
36
|
required_ruby_version: !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
37
38
|
requirements:
|
38
39
|
- - ! '>='
|
39
40
|
- !ruby/object:Gem::Version
|
40
41
|
version: '0'
|
41
|
-
none: false
|
42
42
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
43
44
|
requirements:
|
44
45
|
- - ! '>='
|
45
46
|
- !ruby/object:Gem::Version
|
46
47
|
version: '0'
|
47
|
-
none: false
|
48
48
|
requirements: []
|
49
49
|
rubyforge_project:
|
50
|
-
rubygems_version: 1.8.
|
50
|
+
rubygems_version: 1.8.23
|
51
51
|
signing_key:
|
52
52
|
specification_version: 3
|
53
53
|
summary: implements mumur3 hashing function
|