digest-murmurhash 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/Rakefile +1 -2
- data/digest-murmurhash.gemspec +5 -8
- data/ext/digest/murmurhash/extconf.rb +0 -3
- data/ext/digest/murmurhash/murmurhash.c +121 -202
- data/ext/digest/murmurhash/murmurhash.h +23 -55
- data/ext/digest/murmurhash/murmurhash1.c +35 -47
- data/ext/digest/murmurhash/murmurhash1.h +0 -2
- data/ext/digest/murmurhash/murmurhash2.c +35 -48
- data/ext/digest/murmurhash/murmurhash2.h +0 -2
- data/ext/digest/murmurhash/murmurhash2a.c +41 -62
- data/ext/digest/murmurhash/murmurhash2a.h +0 -2
- data/ext/digest/murmurhash/murmurhash64a.c +40 -52
- data/ext/digest/murmurhash/murmurhash64a.h +0 -3
- data/ext/digest/murmurhash/murmurhash64b.c +56 -68
- data/ext/digest/murmurhash/murmurhash64b.h +0 -2
- data/ext/digest/murmurhash/murmurhash_aligned2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -2
- data/ext/digest/murmurhash/murmurhash_neutral2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -2
- data/lib/digest/murmurhash.rb +72 -6
- data/spec/digest_spec.rb +20 -19
- data/spec/exception_spec.rb +4 -4
- data/spec/mem_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +26 -27
- data/lib/digest/murmurhash/version.rb +0 -5
@@ -4,10 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur64a_finish(VALUE self);
|
7
|
-
VALUE murmur64a_to_i(VALUE self);
|
8
7
|
VALUE murmur64a_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur64a_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur64a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH64A_INCLUDED */
|
13
|
-
|
@@ -7,87 +7,75 @@
|
|
7
7
|
uint64_t
|
8
8
|
murmur_hash_process64b(const char * key, uint32_t len, uint64_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
}
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
const int r = 24;
|
12
|
+
|
13
|
+
uint32_t h1 = (uint32_t)(seed) ^ len;
|
14
|
+
uint32_t h2 = (uint32_t)(seed >> 32);
|
15
|
+
|
16
|
+
const uint32_t * data = (const uint32_t *)key;
|
17
|
+
|
18
|
+
while(len >= 8) {
|
19
|
+
uint32_t k1 = *data++;
|
20
|
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
21
|
+
h1 *= m; h1 ^= k1;
|
22
|
+
len -= 4;
|
23
|
+
|
24
|
+
uint32_t k2 = *data++;
|
25
|
+
k2 *= m; k2 ^= k2 >> r; k2 *= m;
|
26
|
+
h2 *= m; h2 ^= k2;
|
27
|
+
len -= 4;
|
28
|
+
}
|
29
|
+
|
30
|
+
if(len >= 4) {
|
31
|
+
uint32_t k1 = *data++;
|
32
|
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
33
|
+
h1 *= m; h1 ^= k1;
|
34
|
+
len -= 4;
|
35
|
+
}
|
36
|
+
|
37
|
+
switch(len) {
|
38
|
+
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
|
39
|
+
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
|
40
|
+
case 1: h2 ^= ((unsigned char*)data)[0];
|
41
|
+
h2 *= m;
|
42
|
+
};
|
43
|
+
|
44
|
+
h1 ^= h2 >> 18; h1 *= m;
|
45
|
+
h2 ^= h1 >> 22; h2 *= m;
|
46
|
+
h1 ^= h2 >> 17; h1 *= m;
|
47
|
+
h2 ^= h1 >> 19; h2 *= m;
|
48
|
+
|
49
|
+
uint64_t h = h1;
|
50
|
+
|
51
|
+
h = (h << 32) | h2;
|
52
|
+
|
53
|
+
return h;
|
54
|
+
}
|
55
55
|
|
56
56
|
VALUE
|
57
57
|
murmur64b_finish(VALUE self)
|
58
58
|
{
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
h = _murmur_finish64(self, murmur_hash_process64b);
|
63
|
-
ASSINE_BY_ENDIAN_64(digest, h);
|
64
|
-
return rb_str_new((const char*) digest, 8);
|
65
|
-
}
|
59
|
+
uint8_t digest[8];
|
60
|
+
uint64_t h;
|
66
61
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
return ULL2NUM(_murmur_finish64(self, murmur_hash_process64b));
|
62
|
+
h = _murmur_finish64(self, murmur_hash_process64b);
|
63
|
+
assign_by_endian_64(digest, h);
|
64
|
+
return rb_str_new((const char*) digest, 8);
|
71
65
|
}
|
72
66
|
|
73
67
|
VALUE
|
74
68
|
murmur64b_s_digest(int argc, VALUE *argv, VALUE klass)
|
75
69
|
{
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
}
|
82
|
-
|
83
|
-
VALUE
|
84
|
-
murmur64b_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
85
|
-
{
|
86
|
-
return hexencode_str_new(murmur64b_s_digest(argc, argv, klass));
|
70
|
+
uint8_t digest[8];
|
71
|
+
uint64_t h;
|
72
|
+
h = _murmur_s_digest64(argc, argv, klass, murmur_hash_process64b);
|
73
|
+
assign_by_endian_64(digest, h);
|
74
|
+
return rb_str_new((const char*) digest, 8);
|
87
75
|
}
|
88
76
|
|
89
77
|
VALUE
|
90
78
|
murmur64b_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
91
79
|
{
|
92
|
-
|
80
|
+
return ULL2NUM(_murmur_s_digest64(argc, argv, klass, murmur_hash_process64b));
|
93
81
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur64b_finish(VALUE self);
|
7
|
-
VALUE murmur64b_to_i(VALUE self);
|
8
7
|
VALUE murmur64b_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur64b_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur64b_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH64B_INCLUDED */
|
@@ -7,81 +7,69 @@
|
|
7
7
|
uint32_t
|
8
8
|
murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
10
|
+
const uint32_t m = 0x5bd1e995;
|
11
|
+
const int r = 24;
|
12
12
|
|
13
|
-
|
13
|
+
uint32_t h = seed ^ len;
|
14
14
|
|
15
|
-
|
15
|
+
const unsigned char * data = (const unsigned char *)key;
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
while(len >= 4) {
|
18
|
+
uint32_t k;
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
k = data[0];
|
21
|
+
k |= data[1] << 8;
|
22
|
+
k |= data[2] << 16;
|
23
|
+
k |= data[3] << 24;
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
k *= m;
|
26
|
+
k ^= k >> r;
|
27
|
+
k *= m;
|
28
28
|
|
29
|
-
|
30
|
-
|
29
|
+
h *= m;
|
30
|
+
h ^= k;
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
switch(len) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
};
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
46
|
|
47
|
-
|
47
|
+
return h;
|
48
48
|
}
|
49
49
|
|
50
50
|
VALUE
|
51
51
|
murmur_aligned2_finish(VALUE self)
|
52
52
|
{
|
53
|
-
|
54
|
-
|
53
|
+
uint8_t digest[4];
|
54
|
+
uint32_t h;
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
60
|
-
|
61
|
-
VALUE
|
62
|
-
murmur_aligned2_to_i(VALUE self)
|
63
|
-
{
|
64
|
-
return ULL2NUM(_murmur_finish32(self, murmur_hash_process_aligned2));
|
56
|
+
h = _murmur_finish32(self, murmur_hash_process_aligned2);
|
57
|
+
assign_by_endian_32(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
65
59
|
}
|
66
60
|
|
67
61
|
VALUE
|
68
62
|
murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
63
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
murmur_aligned2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
-
{
|
80
|
-
return hexencode_str_new(murmur_aligned2_s_digest(argc, argv, klass));
|
64
|
+
uint8_t digest[4];
|
65
|
+
uint64_t h;
|
66
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
|
67
|
+
assign_by_endian_32(digest, h);
|
68
|
+
return rb_str_new((const char*) digest, 4);
|
81
69
|
}
|
82
70
|
|
83
71
|
VALUE
|
84
72
|
murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
73
|
{
|
86
|
-
|
74
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2));
|
87
75
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur_aligned2_finish(VALUE self);
|
7
|
-
VALUE murmur_aligned2_to_i(VALUE self);
|
8
7
|
VALUE murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur_aligned2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH_ALIGNED2_INCLUDED */
|
@@ -7,81 +7,69 @@
|
|
7
7
|
uint32_t
|
8
8
|
murmur_hash_process_neutral2(const char * key, uint32_t len, uint32_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
10
|
+
const uint32_t m = 0x5bd1e995;
|
11
|
+
const int r = 24;
|
12
12
|
|
13
|
-
|
13
|
+
uint32_t h = seed ^ len;
|
14
14
|
|
15
|
-
|
15
|
+
const unsigned char * data = (const unsigned char *)key;
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
while(len >= 4) {
|
18
|
+
uint32_t k;
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
20
|
+
k = data[0];
|
21
|
+
k |= data[1] << 8;
|
22
|
+
k |= data[2] << 16;
|
23
|
+
k |= data[3] << 24;
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
k *= m;
|
26
|
+
k ^= k >> r;
|
27
|
+
k *= m;
|
28
28
|
|
29
|
-
|
30
|
-
|
29
|
+
h *= m;
|
30
|
+
h ^= k;
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
36
|
+
switch(len) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
};
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
46
|
|
47
|
-
|
47
|
+
return h;
|
48
48
|
}
|
49
49
|
|
50
50
|
VALUE
|
51
51
|
murmur_neutral2_finish(VALUE self)
|
52
52
|
{
|
53
|
-
|
54
|
-
|
53
|
+
uint8_t digest[4];
|
54
|
+
uint32_t h;
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
60
|
-
|
61
|
-
VALUE
|
62
|
-
murmur_neutral2_to_i(VALUE self)
|
63
|
-
{
|
64
|
-
return ULL2NUM(_murmur_finish32(self, murmur_hash_process_neutral2));
|
56
|
+
h = _murmur_finish32(self, murmur_hash_process_neutral2);
|
57
|
+
assign_by_endian_32(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
65
59
|
}
|
66
60
|
|
67
61
|
VALUE
|
68
62
|
murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
63
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
-
{
|
80
|
-
return hexencode_str_new(murmur_neutral2_s_digest(argc, argv, klass));
|
64
|
+
uint8_t digest[4];
|
65
|
+
uint64_t h;
|
66
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
|
67
|
+
assign_by_endian_32(digest, h);
|
68
|
+
return rb_str_new((const char*) digest, 4);
|
81
69
|
}
|
82
70
|
|
83
71
|
VALUE
|
84
72
|
murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
73
|
{
|
86
|
-
|
74
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2));
|
87
75
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur_neutral2_finish(VALUE self);
|
7
|
-
VALUE murmur_neutral2_to_i(VALUE self);
|
8
7
|
VALUE murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH_NEUTRAL2_INCLUDED */
|
data/lib/digest/murmurhash.rb
CHANGED
@@ -1,7 +1,73 @@
|
|
1
|
-
require "digest/
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
1
|
+
require "digest/simple"
|
2
|
+
|
3
|
+
module Digest
|
4
|
+
class MurmurHash < Simple
|
5
|
+
def initialize
|
6
|
+
@seed = self.class::DEFAULT_SEED
|
7
|
+
super
|
8
|
+
end
|
9
|
+
|
10
|
+
def seed
|
11
|
+
@seed
|
12
|
+
end
|
13
|
+
|
14
|
+
def seed=(s)
|
15
|
+
fail ArgumentError, "seed string should #{digest_length * 16} bit chars" if s.bytesize != digest_length
|
16
|
+
@seed = s
|
17
|
+
end
|
18
|
+
|
19
|
+
module Size32
|
20
|
+
DEFAULT_SEED = "\x00\x00\x00\x00".encode('ASCII-8BIT')
|
21
|
+
|
22
|
+
def digest_length
|
23
|
+
4
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_i
|
27
|
+
finish.unpack("I")[0]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Size64
|
32
|
+
DEFAULT_SEED = "\x00\x00\x00\x00\x00\x00\x00\x00".encode('ASCII-8BIT')
|
33
|
+
|
34
|
+
def digest_length
|
35
|
+
8
|
36
|
+
end
|
37
|
+
|
38
|
+
def to_i
|
39
|
+
finish.unpack("L")[0]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class MurmurHash1 < MurmurHash
|
45
|
+
include Size32
|
46
|
+
end
|
47
|
+
|
48
|
+
class MurmurHash2 < MurmurHash
|
49
|
+
include Size32
|
50
|
+
end
|
51
|
+
|
52
|
+
class MurmurHash2A < MurmurHash
|
53
|
+
include Size32
|
54
|
+
end
|
55
|
+
|
56
|
+
class MurmurHash64A < MurmurHash
|
57
|
+
include Size64
|
58
|
+
end
|
59
|
+
|
60
|
+
class MurmurHash64B < MurmurHash
|
61
|
+
include Size64
|
62
|
+
end
|
63
|
+
|
64
|
+
class MurmurHashAligned2 < MurmurHash
|
65
|
+
include Size32
|
66
|
+
end
|
67
|
+
|
68
|
+
class MurmurHashNeutral2 < MurmurHash
|
69
|
+
include Size32
|
70
|
+
end
|
6
71
|
end
|
7
|
-
|
72
|
+
|
73
|
+
require "digest/murmurhash/murmurhash"
|