jmhodges-murmur_hash 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +8 -0
- data/README.txt +68 -0
- data/Rakefile +84 -0
- data/ext/murmur/extconf.rb +6 -0
- data/ext/murmur/murmur_hash.cpp +429 -0
- data/lib/murmur_hash.rb +4 -0
- data/test/test_murmur_hash.rb +57 -0
- metadata +84 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
= murmur_hash
|
2
|
+
|
3
|
+
* http://github.com/jmhodges/murmur_hash
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
* A ruby extension for the MurmurHash2 functions. See http://murmurhash.googlepages.com/
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
* Provides all 3 versions of the 32-bit MurmurHash functions which
|
12
|
+
are:
|
13
|
+
|
14
|
+
* `MurmurHash.murmur_hash`, the simplest and fastest version of
|
15
|
+
MurmurHash2. Note: output depends on the endianness of the machine
|
16
|
+
it is run on.
|
17
|
+
* `MurmurHash.neutral_murmur_hash`, a version of the
|
18
|
+
straight-forward MurmurHash2 function that does not depend on the
|
19
|
+
endianness of the machine it is run on. Slightly slower.
|
20
|
+
* `MurmurHash.aligned_murmur_hash`, a version of the MurmurHash2
|
21
|
+
function that does only byte-aligned reads. It is slightly safer
|
22
|
+
on some uncommon platforms.
|
23
|
+
|
24
|
+
* Provides the 64-bit MurmurHash function. When on a 32-bit machine,
|
25
|
+
it will use the backward compatible version.
|
26
|
+
|
27
|
+
== SYNOPSIS:
|
28
|
+
# 23 can be any unsigned 32-bit integer (i.e. from 0 to 2**32 - 1)
|
29
|
+
hash_number = MurmurHash.murmur_hash("somestring", 23)
|
30
|
+
|
31
|
+
h = MurmurHash.neutral_murmur_hash("s", 23)
|
32
|
+
h = MurmurHash.aligned_murmur_hash("s", 23)
|
33
|
+
|
34
|
+
# 23 still must be a unsigned 32-bit integer
|
35
|
+
h = MurmurHash.murmur_hash64("s", 23)
|
36
|
+
|
37
|
+
== REQUIREMENTS:
|
38
|
+
|
39
|
+
* rake-compiler for compilation
|
40
|
+
|
41
|
+
== INSTALL:
|
42
|
+
|
43
|
+
* sudo gem install murmur_hash
|
44
|
+
|
45
|
+
== LICENSE:
|
46
|
+
|
47
|
+
(The MIT License)
|
48
|
+
|
49
|
+
Copyright (c) 2009 Jeff Hodges
|
50
|
+
|
51
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
52
|
+
a copy of this software and associated documentation files (the
|
53
|
+
'Software'), to deal in the Software without restriction, including
|
54
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
55
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
56
|
+
permit persons to whom the Software is furnished to do so, subject to
|
57
|
+
the following conditions:
|
58
|
+
|
59
|
+
The above copyright notice and this permission notice shall be
|
60
|
+
included in all copies or substantial portions of the Software.
|
61
|
+
|
62
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
63
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
64
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
65
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
66
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
67
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
68
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'rubygems'
|
3
|
+
require 'rake'
|
4
|
+
|
5
|
+
def define_rake_stuff
|
6
|
+
require 'hoe'
|
7
|
+
begin
|
8
|
+
require './lib/murmur_hash.rb'
|
9
|
+
|
10
|
+
rescue LoadError => e
|
11
|
+
murmur_bundle = File.expand_path(File.dirname(__FILE__) + '/lib/murmur')
|
12
|
+
if (e.message =~ /#{murmur_bundle}(\.bundle)?$/)
|
13
|
+
STDERR.puts "Looks like you have not run a successful `rake compile`, yet. Do that next!"
|
14
|
+
else
|
15
|
+
raise e
|
16
|
+
end
|
17
|
+
end
|
18
|
+
Rake::ExtensionTask.new('murmur')
|
19
|
+
define_hoe_tasks
|
20
|
+
end
|
21
|
+
|
22
|
+
def define_hoe_tasks
|
23
|
+
version = '0'
|
24
|
+
if defined?(MurmurHash)
|
25
|
+
version = MurmurHash::VERSION
|
26
|
+
end
|
27
|
+
|
28
|
+
h = Hoe.new('murmur_hash', version) do |p|
|
29
|
+
# p.rubyforge_name = 'murmur_hash' # if different than lowercase project name
|
30
|
+
p.developer('Jeff Hodges', 'jeff@somethingsimilar.com')
|
31
|
+
p.extra_deps << ['rake-compiler', '~>0.5']
|
32
|
+
end
|
33
|
+
|
34
|
+
Rake::Task[:test].prerequisites << :compile
|
35
|
+
h
|
36
|
+
end
|
37
|
+
|
38
|
+
begin
|
39
|
+
require 'rake/extensiontask'
|
40
|
+
define_rake_stuff
|
41
|
+
rescue LoadError => e
|
42
|
+
STDERR.puts(
|
43
|
+
"Be sure to run 'sudo rake setup' because you don't have all the gems installed"
|
44
|
+
)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Putting this out here cuts off a great deal of time.
|
48
|
+
@gems = Gem::SourceIndex.from_installed_gems
|
49
|
+
|
50
|
+
def conditionally_install_gem(name, version_requirements)
|
51
|
+
|
52
|
+
di = Gem::DependencyInstaller.new
|
53
|
+
hoe_dep = Gem::Dependency.new('hoe', "~> 1.12")
|
54
|
+
if @gems.search(hoe_dep).empty?
|
55
|
+
di.install hoe_dep
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
task :default => :compile
|
60
|
+
|
61
|
+
# Taken liberally from http://blog.labnotes.org/2008/02/28/svn-checkout-rake-setup/
|
62
|
+
desc "If you're building from source, run this task first to setup the necessary dependencies."
|
63
|
+
task :setup do
|
64
|
+
require 'rubygems'
|
65
|
+
require 'rubygems/dependency_installer'
|
66
|
+
require 'rubygems/requirement'
|
67
|
+
|
68
|
+
puts "Checking for gems that need to be installed."
|
69
|
+
conditionally_install_gem('hoe', '~>1.12')
|
70
|
+
|
71
|
+
require 'hoe'
|
72
|
+
h = define_hoe_tasks
|
73
|
+
|
74
|
+
# Runtime dependencies from the Gem's spec
|
75
|
+
dependencies = h.spec.dependencies
|
76
|
+
|
77
|
+
dependencies.each do |dep|
|
78
|
+
conditionally_install_gem(dep.name, dep.version_requirements)
|
79
|
+
end
|
80
|
+
|
81
|
+
puts "\nAnd done."
|
82
|
+
end
|
83
|
+
|
84
|
+
# vim: syntax=Ruby
|
@@ -0,0 +1,429 @@
|
|
1
|
+
//-----------------------------------------------------------------------------
|
2
|
+
// MurmurHash2, by Austin Appleby
|
3
|
+
|
4
|
+
// Note - This code makes a few assumptions about how your machine behaves -
|
5
|
+
|
6
|
+
// 1. We can read a 4-byte value from any address without crashing
|
7
|
+
// 2. sizeof(int) == 4
|
8
|
+
|
9
|
+
// And it has a few limitations -
|
10
|
+
|
11
|
+
// 1. It will not work incrementally.
|
12
|
+
// 2. It will not produce the same results on little-endian and big-endian
|
13
|
+
// machines.
|
14
|
+
#include <stdio.h>
|
15
|
+
#include <stdint.h>
|
16
|
+
#include "ruby.h"
|
17
|
+
#include "limits.h"
|
18
|
+
|
19
|
+
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
|
20
|
+
{
|
21
|
+
// 'm' and 'r' are mixing constants generated offline.
|
22
|
+
// They're not really 'magic', they just happen to work well.
|
23
|
+
|
24
|
+
const unsigned int m = 0x5bd1e995;
|
25
|
+
const int r = 24;
|
26
|
+
|
27
|
+
// Initialize the hash to a 'random' value
|
28
|
+
|
29
|
+
unsigned int h = seed ^ len;
|
30
|
+
|
31
|
+
// Mix 4 bytes at a time into the hash
|
32
|
+
|
33
|
+
const unsigned char * data = (const unsigned char *)key;
|
34
|
+
|
35
|
+
while(len >= 4)
|
36
|
+
{
|
37
|
+
unsigned int k = *(unsigned int *)data;
|
38
|
+
|
39
|
+
k *= m;
|
40
|
+
k ^= k >> r;
|
41
|
+
k *= m;
|
42
|
+
|
43
|
+
h *= m;
|
44
|
+
h ^= k;
|
45
|
+
|
46
|
+
data += 4;
|
47
|
+
len -= 4;
|
48
|
+
}
|
49
|
+
|
50
|
+
// Handle the last few bytes of the input array
|
51
|
+
|
52
|
+
switch(len)
|
53
|
+
{
|
54
|
+
case 3: h ^= data[2] << 16;
|
55
|
+
case 2: h ^= data[1] << 8;
|
56
|
+
case 1: h ^= data[0];
|
57
|
+
h *= m;
|
58
|
+
};
|
59
|
+
|
60
|
+
// Do a few final mixes of the hash to ensure the last few
|
61
|
+
// bytes are well-incorporated.
|
62
|
+
|
63
|
+
h ^= h >> 13;
|
64
|
+
h *= m;
|
65
|
+
h ^= h >> 15;
|
66
|
+
|
67
|
+
return h;
|
68
|
+
}
|
69
|
+
|
70
|
+
//-----------------------------------------------------------------------------
|
71
|
+
// MurmurHashAligned2, by Austin Appleby
|
72
|
+
|
73
|
+
// Same algorithm as MurmurHash2, but only does aligned reads - should be safer
|
74
|
+
// on certain platforms.
|
75
|
+
|
76
|
+
// Performance will be lower than MurmurHash2
|
77
|
+
|
78
|
+
#define MURMUR_MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
79
|
+
|
80
|
+
unsigned int MurmurHashAligned2 ( const void * key, int len, unsigned int seed )
|
81
|
+
{
|
82
|
+
const unsigned int m = 0x5bd1e995;
|
83
|
+
const int r = 24;
|
84
|
+
|
85
|
+
const unsigned char * data = (const unsigned char *)key;
|
86
|
+
|
87
|
+
unsigned int h = seed ^ len;
|
88
|
+
|
89
|
+
int align = (int)data & 3;
|
90
|
+
|
91
|
+
if(align && (len >= 4))
|
92
|
+
{
|
93
|
+
// Pre-load the temp registers
|
94
|
+
|
95
|
+
unsigned int t = 0, d = 0;
|
96
|
+
|
97
|
+
switch(align)
|
98
|
+
{
|
99
|
+
case 1: t |= data[2] << 16;
|
100
|
+
case 2: t |= data[1] << 8;
|
101
|
+
case 3: t |= data[0];
|
102
|
+
}
|
103
|
+
|
104
|
+
t <<= (8 * align);
|
105
|
+
|
106
|
+
data += 4-align;
|
107
|
+
len -= 4-align;
|
108
|
+
|
109
|
+
int sl = 8 * (4-align);
|
110
|
+
int sr = 8 * align;
|
111
|
+
|
112
|
+
// Mix
|
113
|
+
|
114
|
+
while(len >= 4)
|
115
|
+
{
|
116
|
+
d = *(unsigned int *)data;
|
117
|
+
t = (t >> sr) | (d << sl);
|
118
|
+
|
119
|
+
unsigned int k = t;
|
120
|
+
|
121
|
+
MURMUR_MIX(h,k,m);
|
122
|
+
|
123
|
+
t = d;
|
124
|
+
|
125
|
+
data += 4;
|
126
|
+
len -= 4;
|
127
|
+
}
|
128
|
+
|
129
|
+
// Handle leftover data in temp registers
|
130
|
+
|
131
|
+
d = 0;
|
132
|
+
|
133
|
+
if(len >= align)
|
134
|
+
{
|
135
|
+
switch(align)
|
136
|
+
{
|
137
|
+
case 3: d |= data[2] << 16;
|
138
|
+
case 2: d |= data[1] << 8;
|
139
|
+
case 1: d |= data[0];
|
140
|
+
}
|
141
|
+
|
142
|
+
unsigned int k = (t >> sr) | (d << sl);
|
143
|
+
MURMUR_MIX(h,k,m);
|
144
|
+
|
145
|
+
data += align;
|
146
|
+
len -= align;
|
147
|
+
|
148
|
+
//----------
|
149
|
+
// Handle tail bytes
|
150
|
+
|
151
|
+
switch(len)
|
152
|
+
{
|
153
|
+
case 3: h ^= data[2] << 16;
|
154
|
+
case 2: h ^= data[1] << 8;
|
155
|
+
case 1: h ^= data[0];
|
156
|
+
h *= m;
|
157
|
+
};
|
158
|
+
}
|
159
|
+
else
|
160
|
+
{
|
161
|
+
switch(len)
|
162
|
+
{
|
163
|
+
case 3: d |= data[2] << 16;
|
164
|
+
case 2: d |= data[1] << 8;
|
165
|
+
case 1: d |= data[0];
|
166
|
+
case 0: h ^= (t >> sr) | (d << sl);
|
167
|
+
h *= m;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
|
171
|
+
h ^= h >> 13;
|
172
|
+
h *= m;
|
173
|
+
h ^= h >> 15;
|
174
|
+
|
175
|
+
return h;
|
176
|
+
}
|
177
|
+
else
|
178
|
+
{
|
179
|
+
while(len >= 4)
|
180
|
+
{
|
181
|
+
unsigned int k = *(unsigned int *)data;
|
182
|
+
|
183
|
+
MURMUR_MIX(h,k,m);
|
184
|
+
|
185
|
+
data += 4;
|
186
|
+
len -= 4;
|
187
|
+
}
|
188
|
+
|
189
|
+
//----------
|
190
|
+
// Handle tail bytes
|
191
|
+
|
192
|
+
switch(len)
|
193
|
+
{
|
194
|
+
case 3: h ^= data[2] << 16;
|
195
|
+
case 2: h ^= data[1] << 8;
|
196
|
+
case 1: h ^= data[0];
|
197
|
+
h *= m;
|
198
|
+
};
|
199
|
+
|
200
|
+
h ^= h >> 13;
|
201
|
+
h *= m;
|
202
|
+
h ^= h >> 15;
|
203
|
+
|
204
|
+
return h;
|
205
|
+
}
|
206
|
+
}
|
207
|
+
|
208
|
+
//-----------------------------------------------------------------------------
|
209
|
+
// MurmurHashNeutral2, by Austin Appleby
|
210
|
+
|
211
|
+
// Same as MurmurHash2, but endian- and alignment-neutral.
|
212
|
+
// Half the speed though, alas.
|
213
|
+
|
214
|
+
unsigned int MurmurHashNeutral2 ( const void * key, int len, unsigned int seed )
|
215
|
+
{
|
216
|
+
const unsigned int m = 0x5bd1e995;
|
217
|
+
const int r = 24;
|
218
|
+
|
219
|
+
unsigned int h = seed ^ len;
|
220
|
+
|
221
|
+
const unsigned char * data = (const unsigned char *)key;
|
222
|
+
|
223
|
+
while(len >= 4)
|
224
|
+
{
|
225
|
+
unsigned int k;
|
226
|
+
|
227
|
+
k = data[0];
|
228
|
+
k |= data[1] << 8;
|
229
|
+
k |= data[2] << 16;
|
230
|
+
k |= data[3] << 24;
|
231
|
+
|
232
|
+
k *= m;
|
233
|
+
k ^= k >> r;
|
234
|
+
k *= m;
|
235
|
+
|
236
|
+
h *= m;
|
237
|
+
h ^= k;
|
238
|
+
|
239
|
+
data += 4;
|
240
|
+
len -= 4;
|
241
|
+
}
|
242
|
+
|
243
|
+
switch(len)
|
244
|
+
{
|
245
|
+
case 3: h ^= data[2] << 16;
|
246
|
+
case 2: h ^= data[1] << 8;
|
247
|
+
case 1: h ^= data[0];
|
248
|
+
h *= m;
|
249
|
+
};
|
250
|
+
|
251
|
+
h ^= h >> 13;
|
252
|
+
h *= m;
|
253
|
+
h ^= h >> 15;
|
254
|
+
|
255
|
+
return h;
|
256
|
+
}
|
257
|
+
|
258
|
+
//-----------------------------------------------------------------------------
|
259
|
+
// MurmurHash2, 64-bit versions, by Austin Appleby
|
260
|
+
|
261
|
+
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment
|
262
|
+
// and endian-ness issues if used across multiple platforms.
|
263
|
+
|
264
|
+
// 64-bit hash for 64-bit platforms
|
265
|
+
|
266
|
+
uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed )
|
267
|
+
{
|
268
|
+
const uint64_t m = 0xc6a4a7935bd1e995LL;
|
269
|
+
const int r = 47;
|
270
|
+
|
271
|
+
uint64_t h = seed ^ (len * m);
|
272
|
+
|
273
|
+
const uint64_t * data = (const uint64_t *)key;
|
274
|
+
const uint64_t * end = data + (len/8);
|
275
|
+
|
276
|
+
while(data != end)
|
277
|
+
{
|
278
|
+
uint64_t k = *data++;
|
279
|
+
|
280
|
+
k *= m;
|
281
|
+
k ^= k >> r;
|
282
|
+
k *= m;
|
283
|
+
|
284
|
+
h ^= k;
|
285
|
+
h *= m;
|
286
|
+
}
|
287
|
+
|
288
|
+
const unsigned char * data2 = (const unsigned char*)data;
|
289
|
+
|
290
|
+
switch(len & 7)
|
291
|
+
{
|
292
|
+
case 7: h ^= uint64_t(data2[6]) << 48;
|
293
|
+
case 6: h ^= uint64_t(data2[5]) << 40;
|
294
|
+
case 5: h ^= uint64_t(data2[4]) << 32;
|
295
|
+
case 4: h ^= uint64_t(data2[3]) << 24;
|
296
|
+
case 3: h ^= uint64_t(data2[2]) << 16;
|
297
|
+
case 2: h ^= uint64_t(data2[1]) << 8;
|
298
|
+
case 1: h ^= uint64_t(data2[0]);
|
299
|
+
h *= m;
|
300
|
+
};
|
301
|
+
|
302
|
+
h ^= h >> r;
|
303
|
+
h *= m;
|
304
|
+
h ^= h >> r;
|
305
|
+
|
306
|
+
return h;
|
307
|
+
}
|
308
|
+
|
309
|
+
|
310
|
+
// 64-bit hash for 32-bit platforms
|
311
|
+
|
312
|
+
uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed )
|
313
|
+
{
|
314
|
+
const unsigned int m = 0x5bd1e995;
|
315
|
+
const int r = 24;
|
316
|
+
|
317
|
+
unsigned int h1 = seed ^ len;
|
318
|
+
unsigned int h2 = 0;
|
319
|
+
|
320
|
+
const unsigned int * data = (const unsigned int *)key;
|
321
|
+
|
322
|
+
while(len >= 8)
|
323
|
+
{
|
324
|
+
unsigned int k1 = *data++;
|
325
|
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
326
|
+
h1 *= m; h1 ^= k1;
|
327
|
+
len -= 4;
|
328
|
+
|
329
|
+
unsigned int k2 = *data++;
|
330
|
+
k2 *= m; k2 ^= k2 >> r; k2 *= m;
|
331
|
+
h2 *= m; h2 ^= k2;
|
332
|
+
len -= 4;
|
333
|
+
}
|
334
|
+
|
335
|
+
if(len >= 4)
|
336
|
+
{
|
337
|
+
unsigned int k1 = *data++;
|
338
|
+
k1 *= m; k1 ^= k1 >> r; k1 *= m;
|
339
|
+
h1 *= m; h1 ^= k1;
|
340
|
+
len -= 4;
|
341
|
+
}
|
342
|
+
|
343
|
+
switch(len)
|
344
|
+
{
|
345
|
+
case 3: h2 ^= ((unsigned char*)data)[2] << 16;
|
346
|
+
case 2: h2 ^= ((unsigned char*)data)[1] << 8;
|
347
|
+
case 1: h2 ^= ((unsigned char*)data)[0];
|
348
|
+
h2 *= m;
|
349
|
+
};
|
350
|
+
|
351
|
+
h1 ^= h2 >> 18; h1 *= m;
|
352
|
+
h2 ^= h1 >> 22; h2 *= m;
|
353
|
+
h1 ^= h2 >> 17; h1 *= m;
|
354
|
+
h2 ^= h1 >> 19; h2 *= m;
|
355
|
+
|
356
|
+
uint64_t h = h1;
|
357
|
+
|
358
|
+
h = (h << 32) | h2;
|
359
|
+
|
360
|
+
return h;
|
361
|
+
}
|
362
|
+
|
363
|
+
VALUE MurmurHashModule = Qnil;
|
364
|
+
|
365
|
+
VALUE call_murmur_func
|
366
|
+
(unsigned int (*func)(const void*, int, unsigned int), VALUE key, VALUE seed) {
|
367
|
+
int key_length = RSTRING(key)->len;
|
368
|
+
char *key_string = RSTRING(key)->ptr;
|
369
|
+
unsigned int seedling = FIX2UINT(seed);
|
370
|
+
|
371
|
+
unsigned int hash_value = func(key_string, key_length, seedling);
|
372
|
+
|
373
|
+
return UINT2NUM(hash_value);
|
374
|
+
}
|
375
|
+
|
376
|
+
VALUE call_murmur64_func
|
377
|
+
(uint64_t (*func)(const void*, int, unsigned int), VALUE key, VALUE seed) {
|
378
|
+
int key_length = RSTRING(key)->len;
|
379
|
+
char *key_string = RSTRING(key)->ptr;
|
380
|
+
unsigned int seedling = FIX2UINT(seed);
|
381
|
+
|
382
|
+
uint64_t hash_value = func(key_string, key_length, seedling);
|
383
|
+
|
384
|
+
return ULL2NUM(hash_value);
|
385
|
+
}
|
386
|
+
|
387
|
+
|
388
|
+
VALUE method_murmur_hash(VALUE self, VALUE key, VALUE seed) {
|
389
|
+
return call_murmur_func(MurmurHash2, key, seed);
|
390
|
+
}
|
391
|
+
|
392
|
+
VALUE method_aligned_murmur_hash(VALUE self, VALUE key, VALUE seed) {
|
393
|
+
return call_murmur_func(MurmurHashAligned2, key, seed);
|
394
|
+
}
|
395
|
+
|
396
|
+
VALUE method_neutral_murmur_hash(VALUE self, VALUE key, VALUE seed) {
|
397
|
+
return call_murmur_func(MurmurHashNeutral2, key, seed);
|
398
|
+
}
|
399
|
+
|
400
|
+
VALUE method_murmur_hash64(VALUE self, VALUE key, VALUE seed) {
|
401
|
+
#ifdef _LP64
|
402
|
+
/* we're on a 64-bit machine so act like it, punk */
|
403
|
+
return call_murmur64_func(MurmurHash64A, key, seed);
|
404
|
+
#else
|
405
|
+
return call_murmur64_func(MurmurHash64B, key, seed);
|
406
|
+
#endif
|
407
|
+
}
|
408
|
+
|
409
|
+
extern "C" void Init_murmur() {
|
410
|
+
MurmurHashModule = rb_define_module("MurmurHash");
|
411
|
+
|
412
|
+
rb_define_module_function(MurmurHashModule,
|
413
|
+
"murmur_hash",
|
414
|
+
(VALUE(*)(...))&method_murmur_hash,
|
415
|
+
2);
|
416
|
+
|
417
|
+
rb_define_module_function(MurmurHashModule,
|
418
|
+
"aligned_murmur_hash",
|
419
|
+
(VALUE(*)(...))&method_aligned_murmur_hash,
|
420
|
+
2);
|
421
|
+
rb_define_module_function(MurmurHashModule,
|
422
|
+
"neutral_murmur_hash",
|
423
|
+
(VALUE(*)(...))&method_neutral_murmur_hash,
|
424
|
+
2);
|
425
|
+
rb_define_module_function(MurmurHashModule,
|
426
|
+
"murmur_hash64",
|
427
|
+
(VALUE(*)(...))&method_murmur_hash64,
|
428
|
+
2);
|
429
|
+
}
|
data/lib/murmur_hash.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require "test/unit"
|
2
|
+
require "murmur_hash"
|
3
|
+
|
4
|
+
|
5
|
+
# I'm sure there's a simpler way to do this, but I can't remember it
|
6
|
+
# sooo. From:
|
7
|
+
# http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/256730
|
8
|
+
# Also posted at: http://gist.github.com/55220
|
9
|
+
module Endian
|
10
|
+
def self.big?
|
11
|
+
endianness == :big
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.little?
|
15
|
+
endianness == :little
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.endianness
|
19
|
+
x = 0xdeadbeef
|
20
|
+
|
21
|
+
endian_type = {
|
22
|
+
Array(x).pack("V*") => :little,
|
23
|
+
Array(x).pack("N*") => :big
|
24
|
+
}
|
25
|
+
|
26
|
+
endian_type[Array(x).pack("L*")]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class TestMurmurHash < Test::Unit::TestCase
|
31
|
+
def test_neutral_does_the_right_thing
|
32
|
+
assert_equal(3435905073, MurmurHash.neutral_murmur_hash("string", 23),
|
33
|
+
"neutral_murmur_hash returns the same on boxes of both endianesses"
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_normal_does_the_right_thing
|
38
|
+
answer = Endian.little? ? 3435905073 : "FIXME find a big-endian box"
|
39
|
+
assert_equal(answer, MurmurHash.murmur_hash("string", 23),
|
40
|
+
"murmur_hash returns #{answer} on a #{Endian.endianness}-endian box, which this box is"
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_aligned_does_the_right_thing
|
45
|
+
answer = Endian.little? ? 3435905073 : "FIXME find a big-endian box"
|
46
|
+
assert_equal(answer, MurmurHash.aligned_murmur_hash("string", 23),
|
47
|
+
"aligned_murmur_hash returns #{answer} on a #{Endian.endianness}-endian box, which this box is"
|
48
|
+
)
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_64bit_does_the_right_thing
|
52
|
+
answer = Endian.little? ? 12403831550545192078 : "FIXME find a big-endian box"
|
53
|
+
assert_equal(answer, MurmurHash.murmur_hash64("string", 23),
|
54
|
+
"64-bit murmur_hash returns #{answer} on a #{Endian.endianness}-endian box, which this box is"
|
55
|
+
)
|
56
|
+
end
|
57
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jmhodges-murmur_hash
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jeff Hodges
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-05-03 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rake-compiler
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ~>
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0.5"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: hoe
|
27
|
+
type: :development
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.12.2
|
34
|
+
version:
|
35
|
+
description: "* A ruby extension for the MurmurHash2 functions. See http://murmurhash.googlepages.com/"
|
36
|
+
email:
|
37
|
+
- jeff@somethingsimilar.com
|
38
|
+
executables: []
|
39
|
+
|
40
|
+
extensions: []
|
41
|
+
|
42
|
+
extra_rdoc_files:
|
43
|
+
- History.txt
|
44
|
+
- Manifest.txt
|
45
|
+
- README.txt
|
46
|
+
files:
|
47
|
+
- History.txt
|
48
|
+
- Manifest.txt
|
49
|
+
- README.txt
|
50
|
+
- Rakefile
|
51
|
+
- lib/murmur_hash.rb
|
52
|
+
- test/test_murmur_hash.rb
|
53
|
+
- ext/murmur/extconf.rb
|
54
|
+
- ext/murmur/murmur_hash.cpp
|
55
|
+
has_rdoc: true
|
56
|
+
homepage: http://github.com/jmhodges/murmur_hash
|
57
|
+
post_install_message:
|
58
|
+
rdoc_options:
|
59
|
+
- --main
|
60
|
+
- README.txt
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
- ext
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project: murmur_hash
|
79
|
+
rubygems_version: 1.2.0
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: "* A ruby extension for the MurmurHash2 functions"
|
83
|
+
test_files:
|
84
|
+
- test/test_murmur_hash.rb
|