digest-murmurhash 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/Rakefile +1 -2
- data/digest-murmurhash.gemspec +5 -8
- data/ext/digest/murmurhash/extconf.rb +0 -3
- data/ext/digest/murmurhash/murmurhash.c +121 -202
- data/ext/digest/murmurhash/murmurhash.h +23 -55
- data/ext/digest/murmurhash/murmurhash1.c +35 -47
- data/ext/digest/murmurhash/murmurhash1.h +0 -2
- data/ext/digest/murmurhash/murmurhash2.c +35 -48
- data/ext/digest/murmurhash/murmurhash2.h +0 -2
- data/ext/digest/murmurhash/murmurhash2a.c +41 -62
- data/ext/digest/murmurhash/murmurhash2a.h +0 -2
- data/ext/digest/murmurhash/murmurhash64a.c +40 -52
- data/ext/digest/murmurhash/murmurhash64a.h +0 -3
- data/ext/digest/murmurhash/murmurhash64b.c +56 -68
- data/ext/digest/murmurhash/murmurhash64b.h +0 -2
- data/ext/digest/murmurhash/murmurhash_aligned2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -2
- data/ext/digest/murmurhash/murmurhash_neutral2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -2
- data/lib/digest/murmurhash.rb +72 -6
- data/spec/digest_spec.rb +20 -19
- data/spec/exception_spec.rb +4 -4
- data/spec/mem_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +26 -27
- data/lib/digest/murmurhash/version.rb +0 -5
@@ -4,10 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur64a_finish(VALUE self);
|
7
|
-
VALUE murmur64a_to_i(VALUE self);
|
8
7
|
VALUE murmur64a_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur64a_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur64a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH64A_INCLUDED */
|
13
|
-
|
@@ -7,87 +7,75 @@
|
|
7
7
|
uint64_t
|
8
8
|
murmur_hash_process64b(const char * key, uint32_t len, uint64_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
}
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
const int r = 24;
|
12
|
+
|
13
|
+
uint32_t h1 = (uint32_t)(seed) ^ len;
|
14
|
+
uint32_t h2 = (uint32_t)(seed >> 32);
|
15
|
+
|
16
|
+
const uint32_t * data = (const uint32_t *)key;
|
17
|
+
|
18
|
+
while(len >= 8) {
|
19
|
+
uint32_t k1 = *data++;
|
20
|
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
21
|
+
h1 *= m; h1 ^= k1;
|
22
|
+
len -= 4;
|
23
|
+
|
24
|
+
uint32_t k2 = *data++;
|
25
|
+
k2 *= m; k2 ^= k2 >> r; k2 *= m;
|
26
|
+
h2 *= m; h2 ^= k2;
|
27
|
+
len -= 4;
|
28
|
+
}
|
29
|
+
|
30
|
+
if(len >= 4) {
|
31
|
+
uint32_t k1 = *data++;
|
32
|
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
33
|
+
h1 *= m; h1 ^= k1;
|
34
|
+
len -= 4;
|
35
|
+
}
|
36
|
+
|
37
|
+
switch(len) {
|
38
|
+
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
|
39
|
+
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
|
40
|
+
case 1: h2 ^= ((unsigned char*)data)[0];
|
41
|
+
h2 *= m;
|
42
|
+
};
|
43
|
+
|
44
|
+
h1 ^= h2 >> 18; h1 *= m;
|
45
|
+
h2 ^= h1 >> 22; h2 *= m;
|
46
|
+
h1 ^= h2 >> 17; h1 *= m;
|
47
|
+
h2 ^= h1 >> 19; h2 *= m;
|
48
|
+
|
49
|
+
uint64_t h = h1;
|
50
|
+
|
51
|
+
h = (h << 32) | h2;
|
52
|
+
|
53
|
+
return h;
|
54
|
+
}
|
55
55
|
|
56
56
|
VALUE
|
57
57
|
murmur64b_finish(VALUE self)
|
58
58
|
{
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
h = _murmur_finish64(self, murmur_hash_process64b);
|
63
|
-
ASSINE_BY_ENDIAN_64(digest, h);
|
64
|
-
return rb_str_new((const char*) digest, 8);
|
65
|
-
}
|
59
|
+
uint8_t digest[8];
|
60
|
+
uint64_t h;
|
66
61
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
return ULL2NUM(_murmur_finish64(self, murmur_hash_process64b));
|
62
|
+
h = _murmur_finish64(self, murmur_hash_process64b);
|
63
|
+
assign_by_endian_64(digest, h);
|
64
|
+
return rb_str_new((const char*) digest, 8);
|
71
65
|
}
|
72
66
|
|
73
67
|
VALUE
|
74
68
|
murmur64b_s_digest(int argc, VALUE *argv, VALUE klass)
|
75
69
|
{
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
}
|
82
|
-
|
83
|
-
VALUE
|
84
|
-
murmur64b_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
85
|
-
{
|
86
|
-
return hexencode_str_new(murmur64b_s_digest(argc, argv, klass));
|
70
|
+
uint8_t digest[8];
|
71
|
+
uint64_t h;
|
72
|
+
h = _murmur_s_digest64(argc, argv, klass, murmur_hash_process64b);
|
73
|
+
assign_by_endian_64(digest, h);
|
74
|
+
return rb_str_new((const char*) digest, 8);
|
87
75
|
}
|
88
76
|
|
89
77
|
VALUE
|
90
78
|
murmur64b_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
91
79
|
{
|
92
|
-
|
80
|
+
return ULL2NUM(_murmur_s_digest64(argc, argv, klass, murmur_hash_process64b));
|
93
81
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur64b_finish(VALUE self);
|
7
|
-
VALUE murmur64b_to_i(VALUE self);
|
8
7
|
VALUE murmur64b_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur64b_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur64b_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH64B_INCLUDED */
|
@@ -7,81 +7,69 @@
|
|
7
7
|
uint32_t
|
8
8
|
murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
10
|
+
const uint32_t m = 0x5bd1e995;
|
11
|
+
const int r = 24;
|
12
12
|
|
13
|
-
|
13
|
+
uint32_t h = seed ^ len;
|
14
14
|
|
15
|
-
|
15
|
+
const unsigned char * data = (const unsigned char *)key;
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
while(len >= 4) {
|
18
|
+
uint32_t k;
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
k = data[0];
|
21
|
+
k |= data[1] << 8;
|
22
|
+
k |= data[2] << 16;
|
23
|
+
k |= data[3] << 24;
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
k *= m;
|
26
|
+
k ^= k >> r;
|
27
|
+
k *= m;
|
28
28
|
|
29
|
-
|
30
|
-
|
29
|
+
h *= m;
|
30
|
+
h ^= k;
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
switch(len) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
};
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
46
|
|
47
|
-
|
47
|
+
return h;
|
48
48
|
}
|
49
49
|
|
50
50
|
VALUE
|
51
51
|
murmur_aligned2_finish(VALUE self)
|
52
52
|
{
|
53
|
-
|
54
|
-
|
53
|
+
uint8_t digest[4];
|
54
|
+
uint32_t h;
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
60
|
-
|
61
|
-
VALUE
|
62
|
-
murmur_aligned2_to_i(VALUE self)
|
63
|
-
{
|
64
|
-
return ULL2NUM(_murmur_finish32(self, murmur_hash_process_aligned2));
|
56
|
+
h = _murmur_finish32(self, murmur_hash_process_aligned2);
|
57
|
+
assign_by_endian_32(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
65
59
|
}
|
66
60
|
|
67
61
|
VALUE
|
68
62
|
murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
63
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
murmur_aligned2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
-
{
|
80
|
-
return hexencode_str_new(murmur_aligned2_s_digest(argc, argv, klass));
|
64
|
+
uint8_t digest[4];
|
65
|
+
uint64_t h;
|
66
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
|
67
|
+
assign_by_endian_32(digest, h);
|
68
|
+
return rb_str_new((const char*) digest, 4);
|
81
69
|
}
|
82
70
|
|
83
71
|
VALUE
|
84
72
|
murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
73
|
{
|
86
|
-
|
74
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2));
|
87
75
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur_aligned2_finish(VALUE self);
|
7
|
-
VALUE murmur_aligned2_to_i(VALUE self);
|
8
7
|
VALUE murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur_aligned2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH_ALIGNED2_INCLUDED */
|
@@ -7,81 +7,69 @@
|
|
7
7
|
uint32_t
|
8
8
|
murmur_hash_process_neutral2(const char * key, uint32_t len, uint32_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
10
|
+
const uint32_t m = 0x5bd1e995;
|
11
|
+
const int r = 24;
|
12
12
|
|
13
|
-
|
13
|
+
uint32_t h = seed ^ len;
|
14
14
|
|
15
|
-
|
15
|
+
const unsigned char * data = (const unsigned char *)key;
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
while(len >= 4) {
|
18
|
+
uint32_t k;
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
k = data[0];
|
21
|
+
k |= data[1] << 8;
|
22
|
+
k |= data[2] << 16;
|
23
|
+
k |= data[3] << 24;
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
k *= m;
|
26
|
+
k ^= k >> r;
|
27
|
+
k *= m;
|
28
28
|
|
29
|
-
|
30
|
-
|
29
|
+
h *= m;
|
30
|
+
h ^= k;
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
switch(len) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
};
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
46
|
|
47
|
-
|
47
|
+
return h;
|
48
48
|
}
|
49
49
|
|
50
50
|
VALUE
|
51
51
|
murmur_neutral2_finish(VALUE self)
|
52
52
|
{
|
53
|
-
|
54
|
-
|
53
|
+
uint8_t digest[4];
|
54
|
+
uint32_t h;
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
60
|
-
|
61
|
-
VALUE
|
62
|
-
murmur_neutral2_to_i(VALUE self)
|
63
|
-
{
|
64
|
-
return ULL2NUM(_murmur_finish32(self, murmur_hash_process_neutral2));
|
56
|
+
h = _murmur_finish32(self, murmur_hash_process_neutral2);
|
57
|
+
assign_by_endian_32(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
65
59
|
}
|
66
60
|
|
67
61
|
VALUE
|
68
62
|
murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
63
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
-
{
|
80
|
-
return hexencode_str_new(murmur_neutral2_s_digest(argc, argv, klass));
|
64
|
+
uint8_t digest[4];
|
65
|
+
uint64_t h;
|
66
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
|
67
|
+
assign_by_endian_32(digest, h);
|
68
|
+
return rb_str_new((const char*) digest, 4);
|
81
69
|
}
|
82
70
|
|
83
71
|
VALUE
|
84
72
|
murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
73
|
{
|
86
|
-
|
74
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2));
|
87
75
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur_neutral2_finish(VALUE self);
|
7
|
-
VALUE murmur_neutral2_to_i(VALUE self);
|
8
7
|
VALUE murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH_NEUTRAL2_INCLUDED */
|
data/lib/digest/murmurhash.rb
CHANGED
@@ -1,7 +1,73 @@
|
|
1
|
-
require "digest/
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
require "digest/simple"
|
2
|
+
|
3
|
+
module Digest
|
4
|
+
class MurmurHash < Simple
|
5
|
+
def initialize
|
6
|
+
@seed = self.class::DEFAULT_SEED
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
def seed
|
11
|
+
@seed
|
12
|
+
end
|
13
|
+
|
14
|
+
def seed=(s)
|
15
|
+
fail ArgumentError, "seed string should #{digest_length * 16} bit chars" if s.bytesize != digest_length
|
16
|
+
@seed = s
|
17
|
+
end
|
18
|
+
|
19
|
+
module Size32
|
20
|
+
DEFAULT_SEED = "\x00\x00\x00\x00".encode('ASCII-8BIT')
|
21
|
+
|
22
|
+
def digest_length
|
23
|
+
4
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_i
|
27
|
+
finish.unpack("I")[0]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Size64
|
32
|
+
DEFAULT_SEED = "\x00\x00\x00\x00\x00\x00\x00\x00".encode('ASCII-8BIT')
|
33
|
+
|
34
|
+
def digest_length
|
35
|
+
8
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_i
|
39
|
+
finish.unpack("L")[0]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class MurmurHash1 < MurmurHash
|
45
|
+
include Size32
|
46
|
+
end
|
47
|
+
|
48
|
+
class MurmurHash2 < MurmurHash
|
49
|
+
include Size32
|
50
|
+
end
|
51
|
+
|
52
|
+
class MurmurHash2A < MurmurHash
|
53
|
+
include Size32
|
54
|
+
end
|
55
|
+
|
56
|
+
class MurmurHash64A < MurmurHash
|
57
|
+
include Size64
|
58
|
+
end
|
59
|
+
|
60
|
+
class MurmurHash64B < MurmurHash
|
61
|
+
include Size64
|
62
|
+
end
|
63
|
+
|
64
|
+
class MurmurHashAligned2 < MurmurHash
|
65
|
+
include Size32
|
66
|
+
end
|
67
|
+
|
68
|
+
class MurmurHashNeutral2 < MurmurHash
|
69
|
+
include Size32
|
70
|
+
end
|
6
71
|
end
|
7
|
-
|
72
|
+
|
73
|
+
require "digest/murmurhash/murmurhash"
|