digest-murmurhash 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/Rakefile +1 -2
- data/digest-murmurhash.gemspec +5 -8
- data/ext/digest/murmurhash/extconf.rb +0 -3
- data/ext/digest/murmurhash/murmurhash.c +121 -202
- data/ext/digest/murmurhash/murmurhash.h +23 -55
- data/ext/digest/murmurhash/murmurhash1.c +35 -47
- data/ext/digest/murmurhash/murmurhash1.h +0 -2
- data/ext/digest/murmurhash/murmurhash2.c +35 -48
- data/ext/digest/murmurhash/murmurhash2.h +0 -2
- data/ext/digest/murmurhash/murmurhash2a.c +41 -62
- data/ext/digest/murmurhash/murmurhash2a.h +0 -2
- data/ext/digest/murmurhash/murmurhash64a.c +40 -52
- data/ext/digest/murmurhash/murmurhash64a.h +0 -3
- data/ext/digest/murmurhash/murmurhash64b.c +56 -68
- data/ext/digest/murmurhash/murmurhash64b.h +0 -2
- data/ext/digest/murmurhash/murmurhash_aligned2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -2
- data/ext/digest/murmurhash/murmurhash_neutral2.c +39 -51
- data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -2
- data/lib/digest/murmurhash.rb +72 -6
- data/spec/digest_spec.rb +20 -19
- data/spec/exception_spec.rb +4 -4
- data/spec/mem_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -1
- metadata +26 -27
- data/lib/digest/murmurhash/version.rb +0 -5
@@ -5,77 +5,65 @@
|
|
5
5
|
#include "murmurhash1.h"
|
6
6
|
|
7
7
|
#define murmur1(r) do { \
|
8
|
-
|
9
|
-
|
8
|
+
h *= m; \
|
9
|
+
h ^= h >> r; \
|
10
10
|
} while(0)
|
11
11
|
|
12
12
|
static uint32_t
|
13
13
|
murmur_hash_process1(const char *data, uint32_t length, uint32_t seed)
|
14
14
|
{
|
15
|
-
|
16
|
-
|
17
|
-
|
15
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
16
|
+
const uint8_t r = 16;
|
17
|
+
uint32_t h;
|
18
18
|
|
19
|
-
|
19
|
+
h = seed ^ (length * m);
|
20
20
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
21
|
+
while (4 <= length) {
|
22
|
+
h += *(uint32_t*)data;
|
23
|
+
murmur1(r);
|
24
|
+
data += 4;
|
25
|
+
length -= 4;
|
26
|
+
}
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
28
|
+
switch (length) {
|
29
|
+
case 3:
|
30
|
+
h += data[2] << 16;
|
31
|
+
case 2:
|
32
|
+
h += data[1] << 8;
|
33
|
+
case 1:
|
34
|
+
h += data[0];
|
35
|
+
murmur1(r);
|
36
|
+
}
|
37
37
|
|
38
|
-
|
39
|
-
|
38
|
+
murmur1(10);
|
39
|
+
murmur1(17);
|
40
40
|
|
41
|
-
|
41
|
+
return h;
|
42
42
|
}
|
43
43
|
|
44
44
|
VALUE
|
45
45
|
murmur1_finish(VALUE self)
|
46
46
|
{
|
47
|
-
|
48
|
-
|
47
|
+
uint8_t digest[4];
|
48
|
+
uint64_t h;
|
49
49
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
}
|
54
|
-
|
55
|
-
VALUE
|
56
|
-
murmur1_to_i(VALUE self)
|
57
|
-
{
|
58
|
-
return UINT2NUM(_murmur_finish32(self, murmur_hash_process1));
|
50
|
+
h = _murmur_finish32(self, murmur_hash_process1);
|
51
|
+
assign_by_endian_32(digest, h);
|
52
|
+
return rb_str_new((const char*) digest, 4);
|
59
53
|
}
|
60
54
|
|
61
55
|
VALUE
|
62
56
|
murmur1_s_digest(int argc, VALUE *argv, VALUE klass)
|
63
57
|
{
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
}
|
70
|
-
|
71
|
-
VALUE
|
72
|
-
murmur1_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
73
|
-
{
|
74
|
-
return hexencode_str_new(murmur1_s_digest(argc, argv, klass));
|
58
|
+
uint8_t digest[4];
|
59
|
+
uint32_t h;
|
60
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process1);
|
61
|
+
assign_by_endian_32(digest, h);
|
62
|
+
return rb_str_new((const char*) digest, 4);
|
75
63
|
}
|
76
64
|
|
77
65
|
VALUE
|
78
66
|
murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
79
67
|
{
|
80
|
-
|
68
|
+
return UINT2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process1));
|
81
69
|
}
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur1_finish(VALUE self);
|
7
|
-
VALUE murmur1_to_i(VALUE self);
|
8
7
|
VALUE murmur1_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur1_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH1_INCLUDED */
|
@@ -7,75 +7,62 @@
|
|
7
7
|
static uint32_t
|
8
8
|
murmur_hash_process2(const char *data, uint32_t length, uint32_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
const uint8_t r = 24;
|
12
|
+
uint32_t h, k;
|
13
13
|
|
14
|
-
|
14
|
+
h = seed ^ length;
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
while (4 <= length) {
|
17
|
+
k = *(uint32_t*)data;
|
18
|
+
k *= m;
|
19
|
+
k ^= k >> r;
|
20
|
+
k *= m;
|
21
21
|
|
22
|
-
|
23
|
-
|
22
|
+
h *= m;
|
23
|
+
h ^= k;
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
25
|
+
data += 4;
|
26
|
+
length -= 4;
|
27
|
+
}
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
switch (length) {
|
30
|
+
case 3: h ^= data[2] << 16;
|
31
|
+
case 2: h ^= data[1] << 8;
|
32
|
+
case 1: h ^= data[0];
|
33
|
+
h *= m;
|
34
|
+
}
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
36
|
+
h ^= h >> 13;
|
37
|
+
h *= m;
|
38
|
+
h ^= h >> 15;
|
39
39
|
|
40
|
-
|
40
|
+
return h;
|
41
41
|
}
|
42
42
|
|
43
43
|
VALUE
|
44
44
|
murmur2_finish(VALUE self)
|
45
45
|
{
|
46
|
-
|
47
|
-
|
46
|
+
uint8_t digest[4];
|
47
|
+
uint64_t h;
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
}
|
53
|
-
|
54
|
-
VALUE
|
55
|
-
murmur2_to_i(VALUE self)
|
56
|
-
{
|
57
|
-
return ULONG2NUM(_murmur_finish32(self, murmur_hash_process2));
|
49
|
+
h = _murmur_finish32(self, murmur_hash_process2);
|
50
|
+
assign_by_endian_32(digest, h);
|
51
|
+
return rb_str_new((const char*) digest, 4);
|
58
52
|
}
|
59
53
|
|
60
54
|
VALUE
|
61
55
|
murmur2_s_digest(int argc, VALUE *argv, VALUE klass)
|
62
56
|
{
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
}
|
69
|
-
|
70
|
-
VALUE
|
71
|
-
murmur2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
72
|
-
{
|
73
|
-
return hexencode_str_new(murmur2_s_digest(argc, argv, klass));
|
57
|
+
uint8_t digest[4];
|
58
|
+
uint32_t h;
|
59
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process2);
|
60
|
+
assign_by_endian_32(digest, h);
|
61
|
+
return rb_str_new((const char*) digest, 4);
|
74
62
|
}
|
75
63
|
|
76
64
|
VALUE
|
77
65
|
murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
78
66
|
{
|
79
|
-
|
67
|
+
return ULONG2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process2));
|
80
68
|
}
|
81
|
-
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur2_finish(VALUE self);
|
7
|
-
VALUE murmur2_to_i(VALUE self);
|
8
7
|
VALUE murmur2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH2_INCLUDED */
|
@@ -9,82 +9,61 @@
|
|
9
9
|
static uint32_t
|
10
10
|
murmur_hash_process2a(const char *key, uint32_t length, uint32_t seed)
|
11
11
|
{
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
}
|
43
|
-
|
44
|
-
static uint32_t
|
45
|
-
_murmur2a_finish(VALUE self)
|
46
|
-
{
|
47
|
-
const char *seed = RSTRING_PTR(murmur_seed_get32(self));
|
48
|
-
MURMURHASH(self, ptr);
|
49
|
-
return murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer, *(uint32_t*)seed);
|
12
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
13
|
+
const uint8_t r = 24;
|
14
|
+
uint32_t h, k, t, l;
|
15
|
+
const unsigned char *data = (const unsigned char *) key;
|
16
|
+
|
17
|
+
l = length;
|
18
|
+
h = seed;
|
19
|
+
|
20
|
+
while (4 <= length) {
|
21
|
+
k = *(uint32_t*)data;
|
22
|
+
mmix(h,k);
|
23
|
+
data += 4;
|
24
|
+
length -= 4;
|
25
|
+
}
|
26
|
+
|
27
|
+
t = 0;
|
28
|
+
switch (length) {
|
29
|
+
case 3: t ^= data[2] << 16;
|
30
|
+
case 2: t ^= data[1] << 8;
|
31
|
+
case 1: t ^= data[0];
|
32
|
+
}
|
33
|
+
|
34
|
+
mmix(h,t);
|
35
|
+
mmix(h,l);
|
36
|
+
|
37
|
+
h ^= h >> 13;
|
38
|
+
h *= m;
|
39
|
+
h ^= h >> 15;
|
40
|
+
|
41
|
+
return h;
|
50
42
|
}
|
51
43
|
|
52
44
|
VALUE
|
53
45
|
murmur2a_finish(VALUE self)
|
54
46
|
{
|
55
|
-
|
56
|
-
|
47
|
+
uint8_t digest[4];
|
48
|
+
uint64_t h;
|
57
49
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
}
|
62
|
-
|
63
|
-
VALUE
|
64
|
-
murmur2a_to_i(VALUE self)
|
65
|
-
{
|
66
|
-
return ULONG2NUM(_murmur2a_finish(self));
|
50
|
+
h = _murmur_finish32(self, murmur_hash_process2a);
|
51
|
+
assign_by_endian_32(digest, h);
|
52
|
+
return rb_str_new((const char*) digest, 4);
|
67
53
|
}
|
68
54
|
|
69
55
|
VALUE
|
70
56
|
murmur2a_s_digest(int argc, VALUE *argv, VALUE klass)
|
71
57
|
{
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
}
|
78
|
-
|
79
|
-
VALUE
|
80
|
-
murmur2a_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
81
|
-
{
|
82
|
-
return hexencode_str_new(murmur2a_s_digest(argc, argv, klass));
|
58
|
+
uint8_t digest[4];
|
59
|
+
uint32_t h;
|
60
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process2a);
|
61
|
+
assign_by_endian_32(digest, h);
|
62
|
+
return rb_str_new((const char*) digest, 4);
|
83
63
|
}
|
84
64
|
|
85
65
|
VALUE
|
86
66
|
murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
87
67
|
{
|
88
|
-
|
68
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process2a));
|
89
69
|
}
|
90
|
-
|
@@ -4,9 +4,7 @@
|
|
4
4
|
#include "murmurhash.h"
|
5
5
|
|
6
6
|
VALUE murmur2a_finish(VALUE self);
|
7
|
-
VALUE murmur2a_to_i(VALUE self);
|
8
7
|
VALUE murmur2a_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
-
VALUE murmur2a_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
8
|
VALUE murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
9
|
|
12
10
|
#endif /* ifndef MURMURHASH2A_INCLUDED */
|
@@ -7,81 +7,69 @@
|
|
7
7
|
static uint64_t
|
8
8
|
murmur_hash_process64a(const char *key, uint32_t len, uint64_t seed)
|
9
9
|
{
|
10
|
-
|
11
|
-
|
10
|
+
const uint64_t m = MURMURHASH_MAGIC64A;
|
11
|
+
const int r = 47;
|
12
12
|
|
13
|
-
|
13
|
+
uint64_t h = seed ^ (len * m);
|
14
14
|
|
15
|
-
|
16
|
-
|
15
|
+
const uint64_t * data = (const uint64_t *)key;
|
16
|
+
const uint64_t * end = data + (len/8);
|
17
17
|
|
18
|
-
|
19
|
-
|
20
|
-
|
18
|
+
while(data != end)
|
19
|
+
{
|
20
|
+
uint64_t k = *data++;
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
22
|
+
k *= m;
|
23
|
+
k ^= k >> r;
|
24
|
+
k *= m;
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
h ^= k;
|
27
|
+
h *= m;
|
28
|
+
}
|
29
29
|
|
30
|
-
|
30
|
+
const unsigned char * data2 = (const unsigned char*)data;
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
32
|
+
switch(len & 7) {
|
33
|
+
case 7: h ^= (uint64_t)((uint64_t)data2[6] << (uint64_t)48);
|
34
|
+
case 6: h ^= (uint64_t)((uint64_t)data2[5] << (uint64_t)40);
|
35
|
+
case 5: h ^= (uint64_t)((uint64_t)data2[4] << (uint64_t)32);
|
36
|
+
case 4: h ^= (uint64_t)((uint64_t)data2[3] << (uint64_t)24);
|
37
|
+
case 3: h ^= (uint64_t)((uint64_t)data2[2] << (uint64_t)16);
|
38
|
+
case 2: h ^= (uint64_t)((uint64_t)data2[1] << (uint64_t)8 );
|
39
|
+
case 1: h ^= (uint64_t)((uint64_t)data2[0] );
|
40
|
+
h *= m;
|
41
|
+
};
|
42
42
|
|
43
|
-
|
44
|
-
|
45
|
-
|
43
|
+
h ^= h >> r;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> r;
|
46
46
|
|
47
|
-
|
47
|
+
return h;
|
48
48
|
}
|
49
49
|
|
50
50
|
VALUE
|
51
51
|
murmur64a_finish(VALUE self)
|
52
52
|
{
|
53
|
-
|
54
|
-
|
53
|
+
uint8_t digest[8];
|
54
|
+
uint64_t h;
|
55
55
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
}
|
60
|
-
|
61
|
-
VALUE
|
62
|
-
murmur64a_to_i(VALUE self)
|
63
|
-
{
|
64
|
-
return ULL2NUM(_murmur_finish64(self, murmur_hash_process64a));
|
56
|
+
h = _murmur_finish64(self, murmur_hash_process64a);
|
57
|
+
assign_by_endian_64(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 8);
|
65
59
|
}
|
66
60
|
|
67
61
|
VALUE
|
68
62
|
murmur64a_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
63
|
{
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
}
|
76
|
-
|
77
|
-
VALUE
|
78
|
-
murmur64a_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
-
{
|
80
|
-
return hexencode_str_new(murmur64a_s_digest(argc, argv, klass));
|
64
|
+
uint8_t digest[8];
|
65
|
+
uint64_t h;
|
66
|
+
h = _murmur_s_digest64(argc, argv, klass, murmur_hash_process64a);
|
67
|
+
assign_by_endian_64(digest, h);
|
68
|
+
return rb_str_new((const char*) digest, 8);
|
81
69
|
}
|
82
70
|
|
83
71
|
VALUE
|
84
72
|
murmur64a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
73
|
{
|
86
|
-
|
74
|
+
return ULL2NUM(_murmur_s_digest64(argc, argv, klass, murmur_hash_process64a));
|
87
75
|
}
|