digest-murmurhash 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +66 -26
  3. data/Rakefile +2 -2
  4. data/digest-murmurhash.gemspec +6 -7
  5. data/ext/digest/murmurhash/{murmurhash1.c → 1.c} +1 -1
  6. data/ext/digest/murmurhash/{murmurhash2.c → 2.c} +1 -1
  7. data/ext/digest/murmurhash/{murmurhash2a.c → 2a.c} +1 -1
  8. data/ext/digest/murmurhash/3_x64_128.c +117 -0
  9. data/ext/digest/murmurhash/3_x86_128.c +141 -0
  10. data/ext/digest/murmurhash/3_x86_32.c +88 -0
  11. data/ext/digest/murmurhash/{murmurhash64a.c → 64a.c} +1 -1
  12. data/ext/digest/murmurhash/{murmurhash64b.c → 64b.c} +2 -2
  13. data/ext/digest/murmurhash/aligned2.c +163 -0
  14. data/ext/digest/murmurhash/extconf.rb +1 -1
  15. data/ext/digest/murmurhash/{murmurhash.c → init.c} +136 -22
  16. data/ext/digest/murmurhash/init.h +94 -0
  17. data/ext/digest/murmurhash/{murmurhash_neutral2.c → neutral2.c} +3 -3
  18. data/lib/digest/murmurhash.rb +3 -73
  19. data/lib/digest/murmurhash/base.rb +58 -0
  20. data/spec/bench.rb +18 -12
  21. data/spec/digest_spec.rb +41 -29
  22. data/spec/exception_spec.rb +17 -12
  23. data/spec/mem_spec.rb +1 -1
  24. data/spec/spec_helper.rb +6 -8
  25. metadata +20 -36
  26. data/ext/digest/murmurhash/murmurhash.h +0 -46
  27. data/ext/digest/murmurhash/murmurhash1.h +0 -11
  28. data/ext/digest/murmurhash/murmurhash2.h +0 -11
  29. data/ext/digest/murmurhash/murmurhash2a.h +0 -11
  30. data/ext/digest/murmurhash/murmurhash64a.h +0 -10
  31. data/ext/digest/murmurhash/murmurhash64b.h +0 -11
  32. data/ext/digest/murmurhash/murmurhash_aligned2.c +0 -75
  33. data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -11
  34. data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -11
@@ -0,0 +1,88 @@
1
+ /*
2
+ * MurmurHash3_x86_32 (C) Austin Appleby
3
+ */
4
+
5
+ #include "init.h"
6
+
7
+ uint32_t
8
+ murmur_hash_process3_x86_32(const char * key, uint32_t len, uint32_t seed)
9
+ {
10
+ const uint8_t * data = (const uint8_t*)key;
11
+ const int nblocks = len / 4;
12
+ int i;
13
+
14
+ uint32_t h1 = seed;
15
+
16
+ const uint32_t c1 = 0xcc9e2d51;
17
+ const uint32_t c2 = 0x1b873593;
18
+
19
+ //----------
20
+ // body
21
+
22
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
23
+
24
+ for(i = -nblocks; i; i++)
25
+ {
26
+ uint32_t k1 = getblock32(blocks,i);
27
+
28
+ k1 *= c1;
29
+ k1 = ROTL32(k1,15);
30
+ k1 *= c2;
31
+
32
+ h1 ^= k1;
33
+ h1 = ROTL32(h1,13);
34
+ h1 = h1*5+0xe6546b64;
35
+ }
36
+
37
+ //----------
38
+ // tail
39
+
40
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
41
+
42
+ uint32_t k1 = 0;
43
+
44
+ switch(len & 3)
45
+ {
46
+ case 3: k1 ^= tail[2] << 16;
47
+ case 2: k1 ^= tail[1] << 8;
48
+ case 1: k1 ^= tail[0];
49
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
50
+ };
51
+
52
+ //----------
53
+ // finalization
54
+
55
+ h1 ^= len;
56
+
57
+ h1 = fmix32(h1);
58
+
59
+ return h1;
60
+ }
61
+
62
+ VALUE
63
+ murmur3_x86_32_finish(VALUE self)
64
+ {
65
+ uint8_t digest[4];
66
+ uint32_t h;
67
+
68
+ h = _murmur_finish32(self, murmur_hash_process3_x86_32);
69
+ assign_by_endian_32(digest, h);
70
+ return rb_str_new((const char*) digest, 4);
71
+ }
72
+
73
+ VALUE
74
+ murmur3_x86_32_s_digest(int argc, VALUE *argv, VALUE klass)
75
+ {
76
+ uint8_t digest[4];
77
+ uint32_t h;
78
+
79
+ h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process3_x86_32);
80
+ assign_by_endian_32(digest, h);
81
+ return rb_str_new((const char*) digest, 4);
82
+ }
83
+
84
+ VALUE
85
+ murmur3_x86_32_s_rawdigest(int argc, VALUE *argv, VALUE klass)
86
+ {
87
+ return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process3_x86_32));
88
+ }
@@ -2,7 +2,7 @@
2
2
  * MurmurHash64A (C) Austin Appleby
3
3
  */
4
4
 
5
- #include "murmurhash64a.h"
5
+ #include "init.h"
6
6
 
7
7
  static uint64_t
8
8
  murmur_hash_process64a(const char *key, uint32_t len, uint64_t seed)
@@ -2,7 +2,7 @@
2
2
  * MurmurHash64B (C) Austin Appleby
3
3
  */
4
4
 
5
- #include "murmurhash64b.h"
5
+ #include "init.h"
6
6
 
7
7
  uint64_t
8
8
  murmur_hash_process64b(const char * key, uint32_t len, uint64_t seed)
@@ -46,7 +46,7 @@ murmur_hash_process64b(const char * key, uint32_t len, uint64_t seed)
46
46
  h1 ^= h2 >> 17; h1 *= m;
47
47
  h2 ^= h1 >> 19; h2 *= m;
48
48
 
49
- uint64_t h = h1;
49
+ uint64_t h = (uint32_t)h1;
50
50
 
51
51
  h = (h << 32) | h2;
52
52
 
@@ -0,0 +1,163 @@
1
+ /*
2
+ * MurmurHashAligned2 (C) Austin Appleby
3
+ */
4
+
5
+ #include "init.h"
6
+
7
+ #define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
8
+
9
+ uint32_t
10
+ murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
11
+ {
12
+ const uint32_t m = 0x5bd1e995;
13
+ const int r = 24;
14
+
15
+ const unsigned char * data = (const unsigned char *)key;
16
+
17
+ uint32_t h = seed ^ len;
18
+
19
+ int align = (uint64_t)data & 3;
20
+
21
+ if(align && (len >= 4))
22
+ {
23
+ // Pre-load the temp registers
24
+
25
+ uint32_t t = 0, d = 0;
26
+
27
+ switch(align)
28
+ {
29
+ case 1: t |= data[2] << 16;
30
+ case 2: t |= data[1] << 8;
31
+ case 3: t |= data[0];
32
+ }
33
+
34
+ t <<= (8 * align);
35
+
36
+ data += 4-align;
37
+ len -= 4-align;
38
+
39
+ int sl = 8 * (4-align);
40
+ int sr = 8 * align;
41
+
42
+ // Mix
43
+
44
+ while(len >= 4)
45
+ {
46
+ d = *(uint32_t *)data;
47
+ t = (t >> sr) | (d << sl);
48
+
49
+ uint32_t k = t;
50
+
51
+ MIX(h,k,m);
52
+
53
+ t = d;
54
+
55
+ data += 4;
56
+ len -= 4;
57
+ }
58
+
59
+ // Handle leftover data in temp registers
60
+
61
+ d = 0;
62
+
63
+ if(len >= align)
64
+ {
65
+ switch(align)
66
+ {
67
+ case 3: d |= data[2] << 16;
68
+ case 2: d |= data[1] << 8;
69
+ case 1: d |= data[0];
70
+ }
71
+
72
+ uint32_t k = (t >> sr) | (d << sl);
73
+ MIX(h,k,m);
74
+
75
+ data += align;
76
+ len -= align;
77
+
78
+ //----------
79
+ // Handle tail bytes
80
+
81
+ switch(len)
82
+ {
83
+ case 3: h ^= data[2] << 16;
84
+ case 2: h ^= data[1] << 8;
85
+ case 1: h ^= data[0];
86
+ h *= m;
87
+ };
88
+ }
89
+ else
90
+ {
91
+ switch(len)
92
+ {
93
+ case 3: d |= data[2] << 16;
94
+ case 2: d |= data[1] << 8;
95
+ case 1: d |= data[0];
96
+ case 0: h ^= (t >> sr) | (d << sl);
97
+ h *= m;
98
+ }
99
+ }
100
+
101
+ h ^= h >> 13;
102
+ h *= m;
103
+ h ^= h >> 15;
104
+
105
+ return h;
106
+ }
107
+ else
108
+ {
109
+ while(len >= 4)
110
+ {
111
+ uint32_t k = *(uint32_t *)data;
112
+
113
+ MIX(h,k,m);
114
+
115
+ data += 4;
116
+ len -= 4;
117
+ }
118
+
119
+ //----------
120
+ // Handle tail bytes
121
+
122
+ switch(len)
123
+ {
124
+ case 3: h ^= data[2] << 16;
125
+ case 2: h ^= data[1] << 8;
126
+ case 1: h ^= data[0];
127
+ h *= m;
128
+ };
129
+
130
+ h ^= h >> 13;
131
+ h *= m;
132
+ h ^= h >> 15;
133
+
134
+ return h;
135
+ }
136
+ }
137
+
138
+ VALUE
139
+ murmur_aligned2_finish(VALUE self)
140
+ {
141
+ uint8_t digest[4];
142
+ uint32_t h;
143
+
144
+ h = _murmur_finish32(self, murmur_hash_process_aligned2);
145
+ assign_by_endian_32(digest, h);
146
+ return rb_str_new((const char*) digest, 4);
147
+ }
148
+
149
+ VALUE
150
+ murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
151
+ {
152
+ uint8_t digest[4];
153
+ uint64_t h;
154
+ h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
155
+ assign_by_endian_32(digest, h);
156
+ return rb_str_new((const char*) digest, 4);
157
+ }
158
+
159
+ VALUE
160
+ murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
161
+ {
162
+ return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2));
163
+ }
@@ -1,3 +1,3 @@
1
1
  require 'mkmf'
2
2
 
3
- create_makefile('digest/murmurhash/murmurhash')
3
+ create_makefile('digest/murmurhash/ext')
@@ -1,25 +1,53 @@
1
- #include "murmurhash.h"
2
- #include "murmurhash1.h"
3
- #include "murmurhash2.h"
4
- #include "murmurhash2a.h"
5
- #include "murmurhash64a.h"
6
- #include "murmurhash64b.h"
7
- #include "murmurhash_neutral2.h"
8
- #include "murmurhash_aligned2.h"
9
-
10
- VALUE cDigest_MurmurHash1,
11
- cDigest_MurmurHash2,
12
- cDigest_MurmurHash2A,
13
- cDigest_MurmurHash64A,
14
- cDigest_MurmurHash64B,
15
- cDigest_MurmurHashNeutral2,
16
- cDigest_MurmurHashAligned2;
1
+ #include "init.h"
2
+
17
3
  ID id_DEFAULT_SEED;
18
4
  ID iv_seed;
19
5
  ID iv_buffer;
20
6
 
7
+
8
+ inline uint32_t rotl32 ( uint32_t x, int8_t r )
9
+ {
10
+ return (x << r) | (x >> (32 - r));
11
+ }
12
+ inline uint64_t rotl64 ( uint64_t x, int8_t r )
13
+ {
14
+ return (x << r) | (x >> (64 - r));
15
+ }
16
+
17
+ FORCE_INLINE uint32_t getblock32 ( const uint32_t * p, int i )
18
+ {
19
+ return p[i];
20
+ }
21
+
22
+ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
23
+ {
24
+ return p[i];
25
+ }
26
+
27
+ FORCE_INLINE uint32_t fmix32 ( uint32_t h )
28
+ {
29
+ h ^= h >> 16;
30
+ h *= 0x85ebca6b;
31
+ h ^= h >> 13;
32
+ h *= 0xc2b2ae35;
33
+ h ^= h >> 16;
34
+
35
+ return h;
36
+ }
37
+
38
+ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
39
+ {
40
+ k ^= k >> 33;
41
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
42
+ k ^= k >> 33;
43
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
44
+ k ^= k >> 33;
45
+
46
+ return k;
47
+ }
48
+
21
49
  void
22
- assign_by_endian_32(uint8_t *digest, uint64_t h)
50
+ assign_by_endian_32(uint8_t *digest, uint32_t h)
23
51
  {
24
52
  if (BIGENDIAN_P()) {
25
53
  digest[0] = h >> 24;
@@ -60,8 +88,32 @@ assign_by_endian_64(uint8_t *digest, uint64_t h)
60
88
  }
61
89
  }
62
90
 
91
+ void
92
+ assign_by_endian_128(uint8_t *digest, void *out)
93
+ {
94
+ int i;
95
+
96
+ if (BIGENDIAN_P()) {
97
+ for (i = 0; i < 4; i++) {
98
+ digest[(i*4) ] = ((uint32_t*)out)[i] >> 24;
99
+ digest[(i*4)+1] = ((uint32_t*)out)[i] >> 16;
100
+ digest[(i*4)+2] = ((uint32_t*)out)[i] >> 8;
101
+ digest[(i*4)+3] = ((uint32_t*)out)[i];
102
+ }
103
+ }
104
+ else {
105
+ for (i = 0; i < 4; i++) {
106
+ digest[16-(i*4)-1] = ((uint32_t*)out)[i] >> 24;
107
+ digest[16-(i*4)-2] = ((uint32_t*)out)[i] >> 16;
108
+ digest[16-(i*4)-3] = ((uint32_t*)out)[i] >> 8;
109
+ digest[16-(i*4)-4] = ((uint32_t*)out)[i];
110
+ }
111
+ }
112
+ }
113
+
114
+
63
115
  uint32_t
64
- _murmur_finish32(VALUE self, uint32_t (*process)(const char *, uint32_t, uint32_t))
116
+ _murmur_finish32(VALUE self, uint32_t (*process)(const char*, uint32_t, uint32_t))
65
117
  {
66
118
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
67
119
  VALUE buffer = rb_ivar_get(self, iv_buffer);
@@ -69,13 +121,21 @@ _murmur_finish32(VALUE self, uint32_t (*process)(const char *, uint32_t, uint32_
69
121
  }
70
122
 
71
123
  uint64_t
72
- _murmur_finish64(VALUE self, uint64_t (*process)(const char *, uint32_t, uint64_t))
124
+ _murmur_finish64(VALUE self, uint64_t (*process)(const char*, uint32_t, uint64_t))
73
125
  {
74
126
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
75
127
  VALUE buffer = rb_ivar_get(self, iv_buffer);
76
128
  return process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint64_t*)seed);
77
129
  }
78
130
 
131
+ void
132
+ _murmur_finish128(VALUE self, void *out, void (*process)(const char*, uint32_t, uint32_t, void*))
133
+ {
134
+ const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
135
+ VALUE buffer = rb_ivar_get(self, iv_buffer);
136
+ process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint32_t*)seed, out);
137
+ }
138
+
79
139
  uint32_t
80
140
  _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const char *, uint32_t, uint32_t))
81
141
  {
@@ -92,7 +152,7 @@ _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const
92
152
  if (1 < argc) {
93
153
  StringValue(argv[1]);
94
154
  if (RSTRING_LEN(argv[1]) != 4) {
95
- rb_raise(rb_eArgError, "seed string should 32 bit chars");
155
+ rb_raise(rb_eArgError, "seed string should be 4 length");
96
156
  }
97
157
  seed = RSTRING_PTR(argv[1]);
98
158
  } else {
@@ -118,7 +178,7 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
118
178
  if (1 < argc) {
119
179
  StringValue(argv[1]);
120
180
  if (RSTRING_LEN(argv[1]) != 8) {
121
- rb_raise(rb_eArgError, "seed string should 64 bit chars");
181
+ rb_raise(rb_eArgError, "seed string should be 8 length");
122
182
  }
123
183
  seed = RSTRING_PTR(argv[1]);
124
184
  } else {
@@ -129,8 +189,47 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
129
189
  }
130
190
 
131
191
  void
132
- Init_murmurhash(void)
192
+ _murmur_s_digest128(int argc, VALUE *argv, VALUE klass, void *out, void (*process)(const char *, uint32_t, uint32_t, void *))
133
193
  {
194
+ VALUE str;
195
+ const char *seed;
196
+ int seed_length = 4;
197
+
198
+ if (argc < 1)
199
+ rb_raise(rb_eArgError, "no data given");
200
+
201
+ str = *argv;
202
+
203
+ StringValue(str);
204
+
205
+ if (1 < argc) {
206
+ StringValue(argv[1]);
207
+ if (RSTRING_LEN(argv[1]) != seed_length) {
208
+ rb_raise(rb_eArgError, "seed string should be %d length", seed_length);
209
+ }
210
+ seed = RSTRING_PTR(argv[1]);
211
+ } else {
212
+ seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
213
+ }
214
+
215
+ process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint32_t*)seed, out);
216
+ }
217
+
218
+
219
+ void
220
+ Init_ext(void)
221
+ {
222
+ VALUE cDigest_MurmurHash1,
223
+ cDigest_MurmurHash2,
224
+ cDigest_MurmurHash2A,
225
+ cDigest_MurmurHash64A,
226
+ cDigest_MurmurHash64B,
227
+ cDigest_MurmurHashNeutral2,
228
+ cDigest_MurmurHashAligned2,
229
+ cDigest_MurmurHash3_x86_32,
230
+ cDigest_MurmurHash3_x86_128,
231
+ cDigest_MurmurHash3_x64_128;
232
+
134
233
  id_DEFAULT_SEED = rb_intern("DEFAULT_SEED");
135
234
  iv_seed = rb_intern("@seed");
136
235
  iv_buffer = rb_intern("@buffer");
@@ -169,4 +268,19 @@ Init_murmurhash(void)
169
268
  rb_define_singleton_method(cDigest_MurmurHashAligned2, "digest", murmur_aligned2_s_digest, -1);
170
269
  rb_define_singleton_method(cDigest_MurmurHashAligned2, "rawdigest", murmur_aligned2_s_rawdigest, -1);
171
270
  rb_define_private_method(cDigest_MurmurHashAligned2, "finish", murmur_aligned2_finish, 0);
271
+
272
+ cDigest_MurmurHash3_x86_32 = rb_path2class("Digest::MurmurHash3_x86_32");
273
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_32, "digest", murmur3_x86_32_s_digest, -1);
274
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_32, "rawdigest", murmur3_x86_32_s_rawdigest, -1);
275
+ rb_define_private_method(cDigest_MurmurHash3_x86_32, "finish", murmur3_x86_32_finish, 0);
276
+
277
+ cDigest_MurmurHash3_x86_128 = rb_path2class("Digest::MurmurHash3_x86_128");
278
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_128, "digest", murmur3_x86_128_s_digest, -1);
279
+ rb_define_singleton_method(cDigest_MurmurHash3_x86_128, "rawdigest", murmur3_x86_128_s_rawdigest, -1);
280
+ rb_define_private_method(cDigest_MurmurHash3_x86_128, "finish", murmur3_x86_128_finish, 0);
281
+
282
+ cDigest_MurmurHash3_x64_128 = rb_path2class("Digest::MurmurHash3_x64_128");
283
+ rb_define_singleton_method(cDigest_MurmurHash3_x64_128, "digest", murmur3_x64_128_s_digest, -1);
284
+ rb_define_singleton_method(cDigest_MurmurHash3_x64_128, "rawdigest", murmur3_x64_128_s_rawdigest, -1);
285
+ rb_define_private_method(cDigest_MurmurHash3_x64_128, "finish", murmur3_x64_128_finish, 0);
172
286
  }