digest-murmurhash 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/Rakefile +1 -2
- data/digest-murmurhash.gemspec +5 -8
- data/ext/digest/murmurhash/extconf.rb +0 -3
- data/ext/digest/murmurhash/murmurhash.c +121 -202
- data/ext/digest/murmurhash/murmurhash.h +23 -55
- data/ext/digest/murmurhash/murmurhash1.c +35 -47
- data/ext/digest/murmurhash/murmurhash1.h +0 -2
- data/ext/digest/murmurhash/murmurhash2.c +35 -48
- data/ext/digest/murmurhash/murmurhash2.h +0 -2
- data/ext/digest/murmurhash/murmurhash2a.c +41 -62
- data/ext/digest/murmurhash/murmurhash2a.h +0 -2
- data/ext/digest/murmurhash/murmurhash64a.c +40 -52
- data/ext/digest/murmurhash/murmurhash64a.h +0 -3
- data/ext/digest/murmurhash/murmurhash64b.c +56 -68
- data/ext/digest/murmurhash/murmurhash64b.h +0 -2
- data/ext/digest/murmurhash/murmurhash_aligned2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -2
- data/ext/digest/murmurhash/murmurhash_neutral2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -2
- data/lib/digest/murmurhash.rb +72 -6
- data/spec/digest_spec.rb +20 -19
- data/spec/exception_spec.rb +4 -4
- data/spec/mem_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +26 -27
- data/lib/digest/murmurhash/version.rb +0 -5
@@ -5,77 +5,65 @@
|
|
5
5
|
#include "murmurhash1.h"
|
6
6
|
|
7
7
|
#define murmur1(r) do { \
|
8
|
-
|
9
|
-
|
8
|
+
h *= m; \
|
9
|
+
h ^= h >> r; \
|
10
10
|
} while(0)
|
11
11
|
|
12
12
|
static uint32_t
|
13
13
|
murmur_hash_process1(const char *data, uint32_t length, uint32_t seed)
|
14
14
|
{
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
16
|
+
const uint8_t r = 16;
|
17
|
+
uint32_t h;
|
18
18
|
|
19
|
-
|
19
|
+
h = seed ^ (length * m);
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
while (4 <= length) {
|
22
|
+
h += *(uint32_t*)data;
|
23
|
+
murmur1(r);
|
24
|
+
data += 4;
|
25
|
+
length -= 4;
|
26
|
+
}
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
28
|
+
switch (length) {
|
29
|
+
case 3:
|
30
|
+
h += data[2] << 16;
|
31
|
+
case 2:
|
32
|
+
h += data[1] << 8;
|
33
|
+
case 1:
|
34
|
+
h += data[0];
|
35
|
+
murmur1(r);
|
36
|
+
}
|
37
37
|
|
38
|
-
|
39
|
-
|
38
|
+
murmur1(10);
|
39
|
+
murmur1(17);
|
40
40
|
|
41
|
-
|
41
|
+
return h;
|
42
42
|
}
|
43
43
|
|
44
44
|
VALUE
|
45
45
|
murmur1_finish(VALUE self)
|
46
46
|
{
|
47
|
-
|
48
|
-
|
47
|
+
uint8_t digest[4];
|
48
|
+
uint64_t h;
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
}
|
54
|
-
|
55
|
-
VALUE
|
56
|
-
murmur1_to_i(VALUE self)
|
57
|
-
{
|
58
|
-
return UINT2NUM(_murmur_finish32(self, murmur_hash_process1));
|
50
|
+
h = _murmur_finish32(self, murmur_hash_process1);
|
51
|
+
assign_by_endian_32(digest, h);
|
52
|
+
return rb_str_new((const char*) digest, 4);
|
59
53
|
}
|
60
54
|
|
61
55
|
VALUE
|
62
56
|
murmur1_s_digest(int argc, VALUE *argv, VALUE klass)
|
63
57
|
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
}
|
70
|
-
|
71
|
-
VALUE
|
72
|
-
murmur1_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
73
|
-
{
|
74
|
-
return hexencode_str_new(murmur1_s_digest(argc, argv, klass));
|
58
|
+
uint8_t digest[4];
|
59
|
+
uint32_t h;
|
60
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process1);
|
61
|
+
assign_by_endian_32(digest, h);
|
62
|
+
return rb_str_new((const char*) digest, 4);
|
75
63
|
}
|
76
64
|
|
77
65
|
VALUE
|
78
66
|
murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
79
67
|
{
|
80
|
-
|
68
|
+
return UINT2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process1));
|
81
69
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur1_finish(VALUE self);
|
7
|
-
VALUE murmur1_to_i(VALUE self);
|
8
7
|
VALUE murmur1_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur1_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH1_INCLUDED */
|
@@ -7,75 +7,62 @@
|
|
7
7
|
static uint32_t
|
8
8
|
murmur_hash_process2(const char *data, uint32_t length, uint32_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
const uint8_t r = 24;
|
12
|
+
uint32_t h, k;
|
13
13
|
|
14
|
-
|
14
|
+
h = seed ^ length;
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
while (4 <= length) {
|
17
|
+
k = *(uint32_t*)data;
|
18
|
+
k *= m;
|
19
|
+
k ^= k >> r;
|
20
|
+
k *= m;
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
h *= m;
|
23
|
+
h ^= k;
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
data += 4;
|
26
|
+
length -= 4;
|
27
|
+
}
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
switch (length) {
|
30
|
+
case 3: h ^= data[2] << 16;
|
31
|
+
case 2: h ^= data[1] << 8;
|
32
|
+
case 1: h ^= data[0];
|
33
|
+
h *= m;
|
34
|
+
}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
h ^= h >> 13;
|
37
|
+
h *= m;
|
38
|
+
h ^= h >> 15;
|
39
39
|
|
40
|
-
|
40
|
+
return h;
|
41
41
|
}
|
42
42
|
|
43
43
|
VALUE
|
44
44
|
murmur2_finish(VALUE self)
|
45
45
|
{
|
46
|
-
|
47
|
-
|
46
|
+
uint8_t digest[4];
|
47
|
+
uint64_t h;
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
}
|
53
|
-
|
54
|
-
VALUE
|
55
|
-
murmur2_to_i(VALUE self)
|
56
|
-
{
|
57
|
-
return ULONG2NUM(_murmur_finish32(self, murmur_hash_process2));
|
49
|
+
h = _murmur_finish32(self, murmur_hash_process2);
|
50
|
+
assign_by_endian_32(digest, h);
|
51
|
+
return rb_str_new((const char*) digest, 4);
|
58
52
|
}
|
59
53
|
|
60
54
|
VALUE
|
61
55
|
murmur2_s_digest(int argc, VALUE *argv, VALUE klass)
|
62
56
|
{
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
}
|
69
|
-
|
70
|
-
VALUE
|
71
|
-
murmur2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
72
|
-
{
|
73
|
-
return hexencode_str_new(murmur2_s_digest(argc, argv, klass));
|
57
|
+
uint8_t digest[4];
|
58
|
+
uint32_t h;
|
59
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process2);
|
60
|
+
assign_by_endian_32(digest, h);
|
61
|
+
return rb_str_new((const char*) digest, 4);
|
74
62
|
}
|
75
63
|
|
76
64
|
VALUE
|
77
65
|
murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
78
66
|
{
|
79
|
-
|
67
|
+
return ULONG2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process2));
|
80
68
|
}
|
81
|
-
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur2_finish(VALUE self);
|
7
|
-
VALUE murmur2_to_i(VALUE self);
|
8
7
|
VALUE murmur2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH2_INCLUDED */
|
@@ -9,82 +9,61 @@
|
|
9
9
|
static uint32_t
|
10
10
|
murmur_hash_process2a(const char *key, uint32_t length, uint32_t seed)
|
11
11
|
{
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
}
|
43
|
-
|
44
|
-
static uint32_t
|
45
|
-
_murmur2a_finish(VALUE self)
|
46
|
-
{
|
47
|
-
const char *seed = RSTRING_PTR(murmur_seed_get32(self));
|
48
|
-
MURMURHASH(self, ptr);
|
49
|
-
return murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer, *(uint32_t*)seed);
|
12
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
13
|
+
const uint8_t r = 24;
|
14
|
+
uint32_t h, k, t, l;
|
15
|
+
const unsigned char *data = (const unsigned char *) key;
|
16
|
+
|
17
|
+
l = length;
|
18
|
+
h = seed;
|
19
|
+
|
20
|
+
while (4 <= length) {
|
21
|
+
k = *(uint32_t*)data;
|
22
|
+
mmix(h,k);
|
23
|
+
data += 4;
|
24
|
+
length -= 4;
|
25
|
+
}
|
26
|
+
|
27
|
+
t = 0;
|
28
|
+
switch (length) {
|
29
|
+
case 3: t ^= data[2] << 16;
|
30
|
+
case 2: t ^= data[1] << 8;
|
31
|
+
case 1: t ^= data[0];
|
32
|
+
}
|
33
|
+
|
34
|
+
mmix(h,t);
|
35
|
+
mmix(h,l);
|
36
|
+
|
37
|
+
h ^= h >> 13;
|
38
|
+
h *= m;
|
39
|
+
h ^= h >> 15;
|
40
|
+
|
41
|
+
return h;
|
50
42
|
}
|
51
43
|
|
52
44
|
VALUE
|
53
45
|
murmur2a_finish(VALUE self)
|
54
46
|
{
|
55
|
-
|
56
|
-
|
47
|
+
uint8_t digest[4];
|
48
|
+
uint64_t h;
|
57
49
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
62
|
-
|
63
|
-
VALUE
|
64
|
-
murmur2a_to_i(VALUE self)
|
65
|
-
{
|
66
|
-
return ULONG2NUM(_murmur2a_finish(self));
|
50
|
+
h = _murmur_finish32(self, murmur_hash_process2a);
|
51
|
+
assign_by_endian_32(digest, h);
|
52
|
+
return rb_str_new((const char*) digest, 4);
|
67
53
|
}
|
68
54
|
|
69
55
|
VALUE
|
70
56
|
murmur2a_s_digest(int argc, VALUE *argv, VALUE klass)
|
71
57
|
{
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
}
|
78
|
-
|
79
|
-
VALUE
|
80
|
-
murmur2a_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
81
|
-
{
|
82
|
-
return hexencode_str_new(murmur2a_s_digest(argc, argv, klass));
|
58
|
+
uint8_t digest[4];
|
59
|
+
uint32_t h;
|
60
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process2a);
|
61
|
+
assign_by_endian_32(digest, h);
|
62
|
+
return rb_str_new((const char*) digest, 4);
|
83
63
|
}
|
84
64
|
|
85
65
|
VALUE
|
86
66
|
murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
87
67
|
{
|
88
|
-
|
68
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process2a));
|
89
69
|
}
|
90
|
-
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur2a_finish(VALUE self);
|
7
|
-
VALUE murmur2a_to_i(VALUE self);
|
8
7
|
VALUE murmur2a_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur2a_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH2A_INCLUDED */
|
@@ -7,81 +7,69 @@
|
|
7
7
|
static uint64_t
|
8
8
|
murmur_hash_process64a(const char *key, uint32_t len, uint64_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
10
|
+
const uint64_t m = MURMURHASH_MAGIC64A;
|
11
|
+
const int r = 47;
|
12
12
|
|
13
|
-
|
13
|
+
uint64_t h = seed ^ (len * m);
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
const uint64_t * data = (const uint64_t *)key;
|
16
|
+
const uint64_t * end = data + (len/8);
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
while(data != end)
|
19
|
+
{
|
20
|
+
uint64_t k = *data++;
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
k *= m;
|
23
|
+
k ^= k >> r;
|
24
|
+
k *= m;
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
h ^= k;
|
27
|
+
h *= m;
|
28
|
+
}
|
29
29
|
|
30
|
-
|
30
|
+
const unsigned char * data2 = (const unsigned char*)data;
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
switch(len & 7) {
|
33
|
+
case 7: h ^= (uint64_t)((uint64_t)data2[6] << (uint64_t)48);
|
34
|
+
case 6: h ^= (uint64_t)((uint64_t)data2[5] << (uint64_t)40);
|
35
|
+
case 5: h ^= (uint64_t)((uint64_t)data2[4] << (uint64_t)32);
|
36
|
+
case 4: h ^= (uint64_t)((uint64_t)data2[3] << (uint64_t)24);
|
37
|
+
case 3: h ^= (uint64_t)((uint64_t)data2[2] << (uint64_t)16);
|
38
|
+
case 2: h ^= (uint64_t)((uint64_t)data2[1] << (uint64_t)8 );
|
39
|
+
case 1: h ^= (uint64_t)((uint64_t)data2[0] );
|
40
|
+
h *= m;
|
41
|
+
};
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
h ^= h >> r;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> r;
|
46
46
|
|
47
|
-
|
47
|
+
return h;
|
48
48
|
}
|
49
49
|
|
50
50
|
VALUE
|
51
51
|
murmur64a_finish(VALUE self)
|
52
52
|
{
|
53
|
-
|
54
|
-
|
53
|
+
uint8_t digest[8];
|
54
|
+
uint64_t h;
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
60
|
-
|
61
|
-
VALUE
|
62
|
-
murmur64a_to_i(VALUE self)
|
63
|
-
{
|
64
|
-
return ULL2NUM(_murmur_finish64(self, murmur_hash_process64a));
|
56
|
+
h = _murmur_finish64(self, murmur_hash_process64a);
|
57
|
+
assign_by_endian_64(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 8);
|
65
59
|
}
|
66
60
|
|
67
61
|
VALUE
|
68
62
|
murmur64a_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
63
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
murmur64a_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
-
{
|
80
|
-
return hexencode_str_new(murmur64a_s_digest(argc, argv, klass));
|
64
|
+
uint8_t digest[8];
|
65
|
+
uint64_t h;
|
66
|
+
h = _murmur_s_digest64(argc, argv, klass, murmur_hash_process64a);
|
67
|
+
assign_by_endian_64(digest, h);
|
68
|
+
return rb_str_new((const char*) digest, 8);
|
81
69
|
}
|
82
70
|
|
83
71
|
VALUE
|
84
72
|
murmur64a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
73
|
{
|
86
|
-
|
74
|
+
return ULL2NUM(_murmur_s_digest64(argc, argv, klass, murmur_hash_process64a));
|
87
75
|
}
|