digest-murmurhash 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +66 -26
  3. data/Rakefile +2 -2
  4. data/digest-murmurhash.gemspec +6 -7
  5. data/ext/digest/murmurhash/{murmurhash1.c → 1.c} +1 -1
  6. data/ext/digest/murmurhash/{murmurhash2.c → 2.c} +1 -1
  7. data/ext/digest/murmurhash/{murmurhash2a.c → 2a.c} +1 -1
  8. data/ext/digest/murmurhash/3_x64_128.c +117 -0
  9. data/ext/digest/murmurhash/3_x86_128.c +141 -0
  10. data/ext/digest/murmurhash/3_x86_32.c +88 -0
  11. data/ext/digest/murmurhash/{murmurhash64a.c → 64a.c} +1 -1
  12. data/ext/digest/murmurhash/{murmurhash64b.c → 64b.c} +2 -2
  13. data/ext/digest/murmurhash/aligned2.c +163 -0
  14. data/ext/digest/murmurhash/extconf.rb +1 -1
  15. data/ext/digest/murmurhash/{murmurhash.c → init.c} +136 -22
  16. data/ext/digest/murmurhash/init.h +94 -0
  17. data/ext/digest/murmurhash/{murmurhash_neutral2.c → neutral2.c} +3 -3
  18. data/lib/digest/murmurhash.rb +3 -73
  19. data/lib/digest/murmurhash/base.rb +58 -0
  20. data/spec/bench.rb +18 -12
  21. data/spec/digest_spec.rb +41 -29
  22. data/spec/exception_spec.rb +17 -12
  23. data/spec/mem_spec.rb +1 -1
  24. data/spec/spec_helper.rb +6 -8
  25. metadata +20 -36
  26. data/ext/digest/murmurhash/murmurhash.h +0 -46
  27. data/ext/digest/murmurhash/murmurhash1.h +0 -11
  28. data/ext/digest/murmurhash/murmurhash2.h +0 -11
  29. data/ext/digest/murmurhash/murmurhash2a.h +0 -11
  30. data/ext/digest/murmurhash/murmurhash64a.h +0 -10
  31. data/ext/digest/murmurhash/murmurhash64b.h +0 -11
  32. data/ext/digest/murmurhash/murmurhash_aligned2.c +0 -75
  33. data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -11
  34. data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -11
@@ -0,0 +1,88 @@
1
+ /*
2
+ * MurmurHash3_x86_32 (C) Austin Appleby
3
+ */
4
+
5
+ #include "init.h"
6
+
7
+ uint32_t
8
+ murmur_hash_process3_x86_32(const char * key, uint32_t len, uint32_t seed)
9
+ {
10
+ const uint8_t * data = (const uint8_t*)key;
11
+ const int nblocks = len / 4;
12
+ int i;
13
+
14
+ uint32_t h1 = seed;
15
+
16
+ const uint32_t c1 = 0xcc9e2d51;
17
+ const uint32_t c2 = 0x1b873593;
18
+
19
+ //----------
20
+ // body
21
+
22
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
23
+
24
+ for(i = -nblocks; i; i++)
25
+ {
26
+ uint32_t k1 = getblock32(blocks,i);
27
+
28
+ k1 *= c1;
29
+ k1 = ROTL32(k1,15);
30
+ k1 *= c2;
31
+
32
+ h1 ^= k1;
33
+ h1 = ROTL32(h1,13);
34
+ h1 = h1*5+0xe6546b64;
35
+ }
36
+
37
+ //----------
38
+ // tail
39
+
40
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
41
+
42
+ uint32_t k1 = 0;
43
+
44
+ switch(len & 3)
45
+ {
46
+ case 3: k1 ^= tail[2] << 16;
47
+ case 2: k1 ^= tail[1] << 8;
48
+ case 1: k1 ^= tail[0];
49
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
50
+ };
51
+
52
+ //----------
53
+ // finalization
54
+
55
+ h1 ^= len;
56
+
57
+ h1 = fmix32(h1);
58
+
59
+ return h1;
60
+ }
61
+
62
+ VALUE
63
+ murmur3_x86_32_finish(VALUE self)
64
+ {
65
+ uint8_t digest[4];
66
+ uint32_t h;
67
+
68
+ h = _murmur_finish32(self, murmur_hash_process3_x86_32);
69
+ assign_by_endian_32(digest, h);
70
+ return rb_str_new((const char*) digest, 4);
71
+ }
72
+
73
+ VALUE
74
+ murmur3_x86_32_s_digest(int argc, VALUE *argv, VALUE klass)
75
+ {
76
+ uint8_t digest[4];
77
+ uint32_t h;
78
+
79
+ h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process3_x86_32);
80
+ assign_by_endian_32(digest, h);
81
+ return rb_str_new((const char*) digest, 4);
82
+ }
83
+
84
+ VALUE
85
+ murmur3_x86_32_s_rawdigest(int argc, VALUE *argv, VALUE klass)
86
+ {
87
+ return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process3_x86_32));
88
+ }
@@ -2,7 +2,7 @@
2
2
  * MurmurHash64A (C) Austin Appleby
3
3
  */
4
4
 
5
- #include "murmurhash64a.h"
5
+ #include "init.h"
6
6
 
7
7
  static uint64_t
8
8
  murmur_hash_process64a(const char *key, uint32_t len, uint64_t seed)
@@ -2,7 +2,7 @@
2
2
  * MurmurHash64B (C) Austin Appleby
3
3
  */
4
4
 
5
- #include "murmurhash64b.h"
5
+ #include "init.h"
6
6
 
7
7
  uint64_t
8
8
  murmur_hash_process64b(const char * key, uint32_t len, uint64_t seed)
@@ -46,7 +46,7 @@ murmur_hash_process64b(const char * key, uint32_t len, uint64_t seed)
46
46
  h1 ^= h2 >> 17; h1 *= m;
47
47
  h2 ^= h1 >> 19; h2 *= m;
48
48
 
49
- uint64_t h = h1;
49
+ uint64_t h = (uint32_t)h1;
50
50
 
51
51
  h = (h << 32) | h2;
52
52
 
@@ -0,0 +1,163 @@
1
+ /*
2
+ * MurmurHashAligned2 (C) Austin Appleby
3
+ */
4
+
5
+ #include "init.h"
6
+
7
+ #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
8
+
9
+ uint32_t
10
+ murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
11
+ {
12
+ const uint32_t m = 0x5bd1e995;
13
+ const int r = 24;
14
+
15
+ const unsigned char * data = (const unsigned char *)key;
16
+
17
+ uint32_t h = seed ^ len;
18
+
19
+ int align = (uint64_t)data & 3;
20
+
21
+ if(align && (len >= 4))
22
+ {
23
+ // Pre-load the temp registers
24
+
25
+ uint32_t t = 0, d = 0;
26
+
27
+ switch(align)
28
+ {
29
+ case 1: t |= data[2] << 16;
30
+ case 2: t |= data[1] << 8;
31
+ case 3: t |= data[0];
32
+ }
33
+
34
+ t <<= (8 * align);
35
+
36
+ data += 4-align;
37
+ len -= 4-align;
38
+
39
+ int sl = 8 * (4-align);
40
+ int sr = 8 * align;
41
+
42
+ // Mix
43
+
44
+ while(len >= 4)
45
+ {
46
+ d = *(uint32_t *)data;
47
+ t = (t >> sr) | (d << sl);
48
+
49
+ uint32_t k = t;
50
+
51
+ MIX(h,k,m);
52
+
53
+ t = d;
54
+
55
+ data += 4;
56
+ len -= 4;
57
+ }
58
+
59
+ // Handle leftover data in temp registers
60
+
61
+ d = 0;
62
+
63
+ if(len >= align)
64
+ {
65
+ switch(align)
66
+ {
67
+ case 3: d |= data[2] << 16;
68
+ case 2: d |= data[1] << 8;
69
+ case 1: d |= data[0];
70
+ }
71
+
72
+ uint32_t k = (t >> sr) | (d << sl);
73
+ MIX(h,k,m);
74
+
75
+ data += align;
76
+ len -= align;
77
+
78
+ //----------
79
+ // Handle tail bytes
80
+
81
+ switch(len)
82
+ {
83
+ case 3: h ^= data[2] << 16;
84
+ case 2: h ^= data[1] << 8;
85
+ case 1: h ^= data[0];
86
+ h *= m;
87
+ };
88
+ }
89
+ else
90
+ {
91
+ switch(len)
92
+ {
93
+ case 3: d |= data[2] << 16;
94
+ case 2: d |= data[1] << 8;
95
+ case 1: d |= data[0];
96
+ case 0: h ^= (t >> sr) | (d << sl);
97
+ h *= m;
98
+ }
99
+ }
100
+
101
+ h ^= h >> 13;
102
+ h *= m;
103
+ h ^= h >> 15;
104
+
105
+ return h;
106
+ }
107
+ else
108
+ {
109
+ while(len >= 4)
110
+ {
111
+ uint32_t k = *(uint32_t *)data;
112
+
113
+ MIX(h,k,m);
114
+
115
+ data += 4;
116
+ len -= 4;
117
+ }
118
+
119
+ //----------
120
+ // Handle tail bytes
121
+
122
+ switch(len)
123
+ {
124
+ case 3: h ^= data[2] << 16;
125
+ case 2: h ^= data[1] << 8;
126
+ case 1: h ^= data[0];
127
+ h *= m;
128
+ };
129
+
130
+ h ^= h >> 13;
131
+ h *= m;
132
+ h ^= h >> 15;
133
+
134
+ return h;
135
+ }
136
+ }
137
+
138
+ VALUE
139
+ murmur_aligned2_finish(VALUE self)
140
+ {
141
+ uint8_t digest[4];
142
+ uint32_t h;
143
+
144
+ h = _murmur_finish32(self, murmur_hash_process_aligned2);
145
+ assign_by_endian_32(digest, h);
146
+ return rb_str_new((const char*) digest, 4);
147
+ }
148
+
149
+ VALUE
150
+ murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
151
+ {
152
+ uint8_t digest[4];
153
+ uint64_t h;
154
+ h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
155
+ assign_by_endian_32(digest, h);
156
+ return rb_str_new((const char*) digest, 4);
157
+ }
158
+
159
+ VALUE
160
+ murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
161
+ {
162
+ return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2));
163
+ }
@@ -1,3 +1,3 @@
1
1
  require 'mkmf'
2
2
 
3
- create_makefile('digest/murmurhash/murmurhash')
3
+ create_makefile('digest/murmurhash/ext')
@@ -1,25 +1,53 @@
1
- #include "murmurhash.h"
2
- #include "murmurhash1.h"
3
- #include "murmurhash2.h"
4
- #include "murmurhash2a.h"
5
- #include "murmurhash64a.h"
6
- #include "murmurhash64b.h"
7
- #include "murmurhash_neutral2.h"
8
- #include "murmurhash_aligned2.h"
9
-
10
- VALUE cDigest_MurmurHash1,
11
- cDigest_MurmurHash2,
12
- cDigest_MurmurHash2A,
13
- cDigest_MurmurHash64A,
14
- cDigest_MurmurHash64B,
15
- cDigest_MurmurHashNeutral2,
16
- cDigest_MurmurHashAligned2;
1
+ #include "init.h"
2
+
17
3
  ID id_DEFAULT_SEED;
18
4
  ID iv_seed;
19
5
  ID iv_buffer;
20
6
 
7
+
8
+ inline uint32_t rotl32 ( uint32_t x, int8_t r )
9
+ {
10
+ return (x << r) | (x >> (32 - r));
11
+ }
12
+ inline uint64_t rotl64 ( uint64_t x, int8_t r )
13
+ {
14
+ return (x << r) | (x >> (64 - r));
15
+ }
16
+
17
+ FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
18
+ {
19
+ return p[i];
20
+ }
21
+
22
+ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
23
+ {
24
+ return p[i];
25
+ }
26
+
27
+ FORCE_INLINE uint32_t fmix32 ( uint32_t h )
28
+ {
29
+ h ^= h >> 16;
30
+ h *= 0x85ebca6b;
31
+ h ^= h >> 13;
32
+ h *= 0xc2b2ae35;
33
+ h ^= h >> 16;
34
+
35
+ return h;
36
+ }
37
+
38
+ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
39
+ {
40
+ k ^= k >> 33;
41
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
42
+ k ^= k >> 33;
43
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
44
+ k ^= k >> 33;
45
+
46
+ return k;
47
+ }
48
+
21
49
  void
22
- assign_by_endian_32(uint8_t *digest, uint64_t h)
50
+ assign_by_endian_32(uint8_t *digest, uint32_t h)
23
51
  {
24
52
  if (BIGENDIAN_P()) {
25
53
  digest[0] = h >> 24;
@@ -60,8 +88,32 @@ assign_by_endian_64(uint8_t *digest, uint64_t h)
60
88
  }
61
89
  }
62
90
 
91
+ void
92
+ assign_by_endian_128(uint8_t *digest, void *out)
93
+ {
94
+ int i;
95
+
96
+ if (BIGENDIAN_P()) {
97
+ for (i = 0; i < 4; i++) {
98
+ digest[(i*4) ] = ((uint32_t*)out)[i] >> 24;
99
+ digest[(i*4)+1] = ((uint32_t*)out)[i] >> 16;
100
+ digest[(i*4)+2] = ((uint32_t*)out)[i] >> 8;
101
+ digest[(i*4)+3] = ((uint32_t*)out)[i];
102
+ }
103
+ }
104
+ else {
105
+ for (i = 0; i < 4; i++) {
106
+ digest[16-(i*4)-1] = ((uint32_t*)out)[i] >> 24;
107
+ digest[16-(i*4)-2] = ((uint32_t*)out)[i] >> 16;
108
+ digest[16-(i*4)-3] = ((uint32_t*)out)[i] >> 8;
109
+ digest[16-(i*4)-4] = ((uint32_t*)out)[i];
110
+ }
111
+ }
112
+ }
113
+
114
+
63
115
  uint32_t
64
- _murmur_finish32(VALUE self, uint32_t (*process)(const char *, uint32_t, uint32_t))
116
+ _murmur_finish32(VALUE self, uint32_t (*process)(const char*, uint32_t, uint32_t))
65
117
  {
66
118
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
67
119
  VALUE buffer = rb_ivar_get(self, iv_buffer);
@@ -69,13 +121,21 @@ _murmur_finish32(VALUE self, uint32_t (*process)(const char *, uint32_t, uint32_
69
121
  }
70
122
 
71
123
  uint64_t
72
- _murmur_finish64(VALUE self, uint64_t (*process)(const char *, uint32_t, uint64_t))
124
+ _murmur_finish64(VALUE self, uint64_t (*process)(const char*, uint32_t, uint64_t))
73
125
  {
74
126
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
75
127
  VALUE buffer = rb_ivar_get(self, iv_buffer);
76
128
  return process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint64_t*)seed);
77
129
  }
78
130
 
131
+ void
132
+ _murmur_finish128(VALUE self, void *out, void (*process)(const char*, uint32_t, uint32_t, void*))
133
+ {
134
+ const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
135
+ VALUE buffer = rb_ivar_get(self, iv_buffer);
136
+ process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint32_t*)seed, out);
137
+ }
138
+
79
139
  uint32_t
80
140
  _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const char *, uint32_t, uint32_t))
81
141
  {
@@ -92,7 +152,7 @@ _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const
92
152
  if (1 < argc) {
93
153
  StringValue(argv[1]);
94
154
  if (RSTRING_LEN(argv[1]) != 4) {
95
- rb_raise(rb_eArgError, "seed string should 32 bit chars");
155
+ rb_raise(rb_eArgError, "seed string should be 4 length");
96
156
  }
97
157
  seed = RSTRING_PTR(argv[1]);
98
158
  } else {
@@ -118,7 +178,7 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
118
178
  if (1 < argc) {
119
179
  StringValue(argv[1]);
120
180
  if (RSTRING_LEN(argv[1]) != 8) {
121
- rb_raise(rb_eArgError, "seed string should 64 bit chars");
181
+ rb_raise(rb_eArgError, "seed string should be 8 length");
122
182
  }
123
183
  seed = RSTRING_PTR(argv[1]);
124
184
  } else {
@@ -129,8 +189,47 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
129
189
  }
130
190
 
131
191
  void
132
- Init_murmurhash(void)
192
+ _murmur_s_digest128(int argc, VALUE *argv, VALUE klass, void *out, void (*process)(const char *, uint32_t, uint32_t, void *))
133
193
  {
194
+ VALUE str;
195
+ const char *seed;
196
+ int seed_length = 4;
197
+
198
+ if (argc < 1)
199
+ rb_raise(rb_eArgError, "no data given");
200
+
201
+ str = *argv;
202
+
203
+ StringValue(str);
204
+
205
+ if (1 < argc) {
206
+ StringValue(argv[1]);
207
+ if (RSTRING_LEN(argv[1]) != seed_length) {
208
+ rb_raise(rb_eArgError, "seed string should be %d length", seed_length);
209
+ }
210
+ seed = RSTRING_PTR(argv[1]);
211
+ } else {
212
+ seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
213
+ }
214
+
215
+ process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint32_t*)seed, out);
216
+ }
217
+
218
+
219
+ void
220
+ Init_ext(void)
221
+ {
222
+ VALUE cDigest_MurmurHash1,
223
+ cDigest_MurmurHash2,
224
+ cDigest_MurmurHash2A,
225
+ cDigest_MurmurHash64A,
226
+ cDigest_MurmurHash64B,
227
+ cDigest_MurmurHashNeutral2,
228
+ cDigest_MurmurHashAligned2,
229
+ cDigest_MurmurHash3_x86_32,
230
+ cDigest_MurmurHash3_x86_128,
231
+ cDigest_MurmurHash3_x64_128;
232
+
134
233
  id_DEFAULT_SEED = rb_intern("DEFAULT_SEED");
135
234
  iv_seed = rb_intern("@seed");
136
235
  iv_buffer = rb_intern("@buffer");
@@ -169,4 +268,19 @@ Init_murmurhash(void)
169
268
  rb_define_singleton_method(cDigest_MurmurHashAligned2, "digest", murmur_aligned2_s_digest, -1);
170
269
  rb_define_singleton_method(cDigest_MurmurHashAligned2, "rawdigest", murmur_aligned2_s_rawdigest, -1);
171
270
  rb_define_private_method(cDigest_MurmurHashAligned2, "finish", murmur_aligned2_finish, 0);
271
+
272
+ cDigest_MurmurHash3_x86_32 = rb_path2class("Digest::MurmurHash3_x86_32");
273
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_32, "digest", murmur3_x86_32_s_digest, -1);
274
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_32, "rawdigest", murmur3_x86_32_s_rawdigest, -1);
275
+ rb_define_private_method(cDigest_MurmurHash3_x86_32, "finish", murmur3_x86_32_finish, 0);
276
+
277
+ cDigest_MurmurHash3_x86_128 = rb_path2class("Digest::MurmurHash3_x86_128");
278
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_128, "digest", murmur3_x86_128_s_digest, -1);
279
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_128, "rawdigest", murmur3_x86_128_s_rawdigest, -1);
280
+ rb_define_private_method(cDigest_MurmurHash3_x86_128, "finish", murmur3_x86_128_finish, 0);
281
+
282
+ cDigest_MurmurHash3_x64_128 = rb_path2class("Digest::MurmurHash3_x64_128");
283
+ rb_define_singleton_method(cDigest_MurmurHash3_x64_128, "digest", murmur3_x64_128_s_digest, -1);
284
+ rb_define_singleton_method(cDigest_MurmurHash3_x64_128, "rawdigest", murmur3_x64_128_s_rawdigest, -1);
285
+ rb_define_private_method(cDigest_MurmurHash3_x64_128, "finish", murmur3_x64_128_finish, 0);
172
286
  }