digest-murmurhash 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +16 -0
- data/Rakefile +11 -1
- data/digest-murmurhash.gemspec +3 -3
- data/ext/digest/murmurhash/extconf.rb +1 -2
- data/ext/digest/murmurhash/murmurhash.c +253 -0
- data/ext/digest/murmurhash/murmurhash.h +52 -35
- data/ext/digest/murmurhash/murmurhash1.c +18 -54
- data/ext/digest/murmurhash/murmurhash2.c +17 -56
- data/ext/digest/murmurhash/murmurhash2a.c +18 -42
- data/ext/digest/murmurhash/murmurhash64a.c +87 -0
- data/ext/digest/murmurhash/murmurhash64a.h +13 -0
- data/ext/digest/murmurhash/murmurhash64b.c +93 -0
- data/ext/digest/murmurhash/murmurhash64b.h +13 -0
- data/ext/digest/murmurhash/murmurhash_aligned2.c +87 -0
- data/ext/digest/murmurhash/murmurhash_aligned2.h +13 -0
- data/ext/digest/murmurhash/murmurhash_neutral2.c +87 -0
- data/ext/digest/murmurhash/murmurhash_neutral2.h +13 -0
- data/lib/digest/murmurhash/version.rb +1 -1
- data/spec/bench.rb +21 -31
- data/spec/digest_spec.rb +54 -34
- data/spec/exception_spec.rb +19 -4
- data/spec/mem_spec.rb +3 -6
- data/spec/spec_helper.rb +21 -0
- metadata +15 -7
- data/ext/digest/murmurhash/init.c +0 -39
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef MURMURHASH_ALIGNED2_INCLUDED
|
2
|
+
# define MURMURHASH_ALIGNED2_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur_aligned2_finish(VALUE self);
|
7
|
+
VALUE murmur_aligned2_to_i(VALUE self);
|
8
|
+
VALUE murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
VALUE murmur_aligned2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
|
+
VALUE murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
|
+
|
12
|
+
#endif /* ifndef MURMURHASH_ALIGNED2_INCLUDED */
|
13
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHashNeutral2 (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash_neutral2.h"
|
6
|
+
|
7
|
+
uint32_t
|
8
|
+
murmur_hash_process_neutral2(const char * key, uint32_t len, uint32_t seed)
|
9
|
+
{
|
10
|
+
const uint32_t m = 0x5bd1e995;
|
11
|
+
const int r = 24;
|
12
|
+
|
13
|
+
uint32_t h = seed ^ len;
|
14
|
+
|
15
|
+
const unsigned char * data = (const unsigned char *)key;
|
16
|
+
|
17
|
+
while(len >= 4) {
|
18
|
+
uint32_t k;
|
19
|
+
|
20
|
+
k = data[0];
|
21
|
+
k |= data[1] << 8;
|
22
|
+
k |= data[2] << 16;
|
23
|
+
k |= data[3] << 24;
|
24
|
+
|
25
|
+
k *= m;
|
26
|
+
k ^= k >> r;
|
27
|
+
k *= m;
|
28
|
+
|
29
|
+
h *= m;
|
30
|
+
h ^= k;
|
31
|
+
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
|
+
|
36
|
+
switch(len) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
};
|
42
|
+
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
|
+
|
47
|
+
return h;
|
48
|
+
}
|
49
|
+
|
50
|
+
VALUE
|
51
|
+
murmur_neutral2_finish(VALUE self)
|
52
|
+
{
|
53
|
+
uint8_t digest[4];
|
54
|
+
uint32_t h;
|
55
|
+
|
56
|
+
h = _murmur_finish32(self, murmur_hash_process_neutral2);
|
57
|
+
ASSINE_BY_ENDIAN_32(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE
|
62
|
+
murmur_neutral2_to_i(VALUE self)
|
63
|
+
{
|
64
|
+
return ULL2NUM(_murmur_finish32(self, murmur_hash_process_neutral2));
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE
|
68
|
+
murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
|
+
{
|
70
|
+
uint8_t digest[4];
|
71
|
+
uint64_t h;
|
72
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
|
73
|
+
ASSINE_BY_ENDIAN_32(digest, h);
|
74
|
+
return rb_str_new((const char*) digest, 4);
|
75
|
+
}
|
76
|
+
|
77
|
+
VALUE
|
78
|
+
murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
+
{
|
80
|
+
return hexencode_str_new(murmur_neutral2_s_digest(argc, argv, klass));
|
81
|
+
}
|
82
|
+
|
83
|
+
VALUE
|
84
|
+
murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
|
+
{
|
86
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2));
|
87
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef MURMURHASH_NEUTRAL2_INCLUDED
|
2
|
+
# define MURMURHASH_NEUTRAL2_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur_neutral2_finish(VALUE self);
|
7
|
+
VALUE murmur_neutral2_to_i(VALUE self);
|
8
|
+
VALUE murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
VALUE murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
|
+
VALUE murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
|
+
|
12
|
+
#endif /* ifndef MURMURHASH_NEUTRAL2_INCLUDED */
|
13
|
+
|
data/spec/bench.rb
CHANGED
@@ -7,18 +7,13 @@ require 'digest/stringbuffer'
|
|
7
7
|
require 'digest/murmurhash'
|
8
8
|
require 'benchmark'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
buffer.unpack("C*").each do |c|
|
18
|
-
result += (c * @prime)
|
19
|
-
end
|
20
|
-
[result & 0xffffffff].pack("N")
|
21
|
-
end
|
10
|
+
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
11
|
+
def rand_str length
|
12
|
+
rand = "";
|
13
|
+
length.times {
|
14
|
+
rand << @rands.sample
|
15
|
+
}
|
16
|
+
rand
|
22
17
|
end
|
23
18
|
|
24
19
|
class Integer
|
@@ -27,12 +22,12 @@ class Integer
|
|
27
22
|
end
|
28
23
|
end
|
29
24
|
|
30
|
-
def murmur_hash str
|
25
|
+
def murmur_hash str, seed
|
31
26
|
data = str.dup.unpack("C*")
|
32
27
|
m = 0x5bd1e995
|
33
28
|
r = 16
|
34
29
|
length = str.bytesize
|
35
|
-
h = (length * m).to_32
|
30
|
+
h = (seed ^ (length * m).to_32).to_32
|
36
31
|
|
37
32
|
while 4 <= length
|
38
33
|
d = data.shift(4).pack("C*").unpack("I")[0]
|
@@ -62,34 +57,29 @@ def murmur_hash str
|
|
62
57
|
h
|
63
58
|
end
|
64
59
|
|
65
|
-
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
66
|
-
def rand_str
|
67
|
-
rand = "";
|
68
|
-
20.times {
|
69
|
-
rand << @rands[rand(62)]
|
70
|
-
}
|
71
|
-
rand
|
72
|
-
end
|
73
|
-
|
74
60
|
n = 100000
|
75
61
|
times_enum = n.times
|
76
62
|
|
77
63
|
a = Array.new(n, 0)
|
78
64
|
n.times do |i|
|
79
|
-
a[i] = rand_str
|
65
|
+
a[i] = rand_str 20
|
80
66
|
end
|
81
|
-
|
67
|
+
seed = rand(2**32)
|
68
|
+
seed_str32 = [seed].pack("L")
|
69
|
+
seed_str64 = [seed].pack("Q")
|
82
70
|
c = Struct.new "Cases",
|
83
71
|
:name,
|
84
72
|
:func
|
85
73
|
cases = [
|
86
|
-
c.new("pureRuby", proc{|x| murmur_hash x }),
|
87
|
-
c.new("
|
88
|
-
c.new("
|
89
|
-
c.new("
|
90
|
-
c.new("
|
74
|
+
c.new("pureRuby", proc{|x| murmur_hash x, seed }),
|
75
|
+
c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x, seed_str32 }),
|
76
|
+
c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x, seed_str32 }),
|
77
|
+
c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x, seed_str32 }),
|
78
|
+
c.new("MurmurHash64A", proc{|x| Digest::MurmurHash64A.rawdigest x, seed_str64 }),
|
79
|
+
c.new("MurmurHash64B", proc{|x| Digest::MurmurHash64B.rawdigest x, seed_str64 }),
|
80
|
+
c.new("MurmurHashNeutral2", proc{|x| Digest::MurmurHashNeutral2.rawdigest x, seed_str32 }),
|
81
|
+
c.new("MurmurHashAligned2", proc{|x| Digest::MurmurHashAligned2.rawdigest x, seed_str32 }),
|
91
82
|
]
|
92
|
-
|
93
83
|
reals = {}
|
94
84
|
confrict = {}
|
95
85
|
confricts = {}
|
data/spec/digest_spec.rb
CHANGED
@@ -1,48 +1,68 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
4
|
-
|
5
|
-
|
3
|
+
describe Digest::MurmurHash do
|
4
|
+
it "seed" do
|
5
|
+
all.each do |c|
|
6
|
+
m = c.new
|
7
|
+
expect(c::DEFAULT_SEED == m.seed).to be true
|
8
|
+
end
|
6
9
|
end
|
7
10
|
|
8
|
-
it "
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
it "seed=" do
|
12
|
+
all.each do |c|
|
13
|
+
m = c.new
|
14
|
+
m.update "murmurhash"
|
15
|
+
before_digest = m.hexdigest
|
16
|
+
m.seed = (c::DEFAULT_SEED.length == 4) ? seed32 : seed64
|
17
|
+
expect(c::DEFAULT_SEED != m.seed).to be true
|
18
|
+
expect(before_digest != m.hexdigest).to be true
|
19
|
+
end
|
12
20
|
end
|
13
21
|
|
14
|
-
it "
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
it "initialize" do
|
23
|
+
all.each do |c|
|
24
|
+
expect(c.new).to be_a_kind_of(Digest::StringBuffer)
|
25
|
+
end
|
18
26
|
end
|
19
27
|
|
20
|
-
it "hexdigest" do
|
21
|
-
|
22
|
-
|
23
|
-
|
28
|
+
it "digest and hexdigest" do
|
29
|
+
all.each do |c|
|
30
|
+
[:digest, :hexdigest].each do |method|
|
31
|
+
str = "a" * 1024
|
32
|
+
d = c.send(method, str)
|
33
|
+
d2 = c.send(method, str, (c::DEFAULT_SEED.length == 4) ? seed32 : seed64)
|
34
|
+
expect(d).to be_a_kind_of(String)
|
35
|
+
expect(d2).to be_a_kind_of(String)
|
36
|
+
expect(d.length).to be > 0
|
37
|
+
expect(d2.length).to be > 0
|
38
|
+
expect(d != d2).to be true
|
39
|
+
end
|
40
|
+
end
|
24
41
|
end
|
25
42
|
|
26
43
|
it "rawdigest" do
|
27
|
-
|
28
|
-
|
29
|
-
|
44
|
+
all.each do |c|
|
45
|
+
str = "a" * 1024
|
46
|
+
d = c.rawdigest str
|
47
|
+
d2 = c.rawdigest str, (c::DEFAULT_SEED.length == 4) ? seed32 : seed64
|
48
|
+
expect(d).to be_a_kind_of(Integer)
|
49
|
+
expect(d2).to be_a_kind_of(Integer)
|
50
|
+
expect(d).to be > 0
|
51
|
+
expect(d2).to be > 0
|
52
|
+
expect(d != d2).to be true
|
53
|
+
end
|
30
54
|
end
|
31
55
|
|
32
|
-
it "update and reset and hexdigest" do
|
33
|
-
|
34
|
-
MurmurHash1 => "c709abd5",
|
35
|
-
MurmurHash2 => "33f67c7e",
|
36
|
-
MurmurHash2A => "df25554b",
|
37
|
-
}.each do |c, should|
|
56
|
+
it "update and reset and hexdigest(32bit)" do
|
57
|
+
all.each do |c|
|
38
58
|
murmur = c.new
|
39
59
|
murmur.update("m").update("u").update("r")
|
40
60
|
murmur << "m" << "u" << "r"
|
41
61
|
murmur << "hash"
|
42
|
-
|
43
|
-
expect(murmur.hexdigest).to
|
44
|
-
|
45
|
-
expect(murmur.hexdigest).to eq(
|
62
|
+
hex = murmur.hexdigest
|
63
|
+
expect(murmur.hexdigest! == hex).to be true
|
64
|
+
reset_str = "0" * ((c::DEFAULT_SEED.length == 4) ? 8 : 16)
|
65
|
+
expect(murmur.hexdigest).to eq(reset_str)
|
46
66
|
end
|
47
67
|
end
|
48
68
|
|
@@ -69,14 +89,14 @@ describe "Digest::MurmurHash1 and 2" do
|
|
69
89
|
end
|
70
90
|
|
71
91
|
it "length" do
|
72
|
-
|
73
|
-
|
74
|
-
|
92
|
+
all.each do |c|
|
93
|
+
expect(c.new.length == c::DEFAULT_SEED.length).to be true
|
94
|
+
end
|
75
95
|
end
|
76
96
|
|
77
97
|
it "to_i" do
|
78
|
-
|
79
|
-
|
80
|
-
|
98
|
+
all.each do |c|
|
99
|
+
expect(c.new.update("murmurhash").to_i).to be_a_kind_of(Integer)
|
100
|
+
end
|
81
101
|
end
|
82
102
|
end
|
data/spec/exception_spec.rb
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe MurmurHash do
|
4
|
-
let :all do
|
5
|
-
[MurmurHash1, MurmurHash2, MurmurHash2A]
|
6
|
-
end
|
7
|
-
|
8
4
|
it "update nil" do
|
9
5
|
all.each do |c|
|
10
6
|
murmur = c.new
|
@@ -17,4 +13,23 @@ describe MurmurHash do
|
|
17
13
|
expect{ c.rawdigest }.to raise_error(ArgumentError)
|
18
14
|
end
|
19
15
|
end
|
16
|
+
|
17
|
+
it "seed digest" do
|
18
|
+
all.each do |c|
|
19
|
+
expect{ c.digest "", ""}.to raise_error(ArgumentError)
|
20
|
+
expect{ c.digest "", "\x00" }.to raise_error(ArgumentError)
|
21
|
+
seed = (c::DEFAULT_SEED.bytesize == 4) ? seed32 : seed64
|
22
|
+
expect{ c.digest "", seed }.to_not raise_error
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it "seed instance" do
|
27
|
+
all.each do |c|
|
28
|
+
murmur = c.new
|
29
|
+
expect{ murmur.seed = "" }.to raise_error(ArgumentError)
|
30
|
+
expect{ murmur.seed = "\x00" }.to raise_error(ArgumentError)
|
31
|
+
seed = (c::DEFAULT_SEED.bytesize == 4) ? seed32 : seed64
|
32
|
+
expect{ murmur.seed = seed }.to_not raise_error
|
33
|
+
end
|
34
|
+
end
|
20
35
|
end
|
data/spec/mem_spec.rb
CHANGED
@@ -2,16 +2,13 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe MurmurHash do
|
4
4
|
it "gc safe" do
|
5
|
-
|
6
|
-
MurmurHash1 => "c709abd5",
|
7
|
-
MurmurHash2 => "33f67c7e",
|
8
|
-
MurmurHash2A => "df25554b",
|
9
|
-
}.each do |c, should|
|
5
|
+
all.each do |c|
|
10
6
|
murmur = c.new
|
7
|
+
init = murmur.to_s
|
11
8
|
GC.start
|
12
9
|
murmur.update("murmur")
|
13
10
|
GC.start
|
14
|
-
expect(murmur.update("hash").to_s).to
|
11
|
+
expect(murmur.update("hash").to_s != init).to be true
|
15
12
|
end
|
16
13
|
end
|
17
14
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,2 +1,23 @@
|
|
1
1
|
require 'digest/murmurhash'
|
2
2
|
include Digest
|
3
|
+
|
4
|
+
def all
|
5
|
+
[
|
6
|
+
MurmurHash1,
|
7
|
+
MurmurHash2,
|
8
|
+
MurmurHash2A,
|
9
|
+
MurmurHash64A,
|
10
|
+
MurmurHash64B,
|
11
|
+
MurmurHashNeutral2,
|
12
|
+
MurmurHashAligned2
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
def seed32
|
17
|
+
(0..4).to_a.pack("C4")
|
18
|
+
end
|
19
|
+
|
20
|
+
def seed64
|
21
|
+
(0..8).to_a.pack("C8")
|
22
|
+
end
|
23
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-murmurhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ksss
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: digest-stringbuffer
|
@@ -72,15 +72,15 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ~>
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
75
|
+
version: 0.8.3
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
83
|
-
description: Digest::
|
82
|
+
version: 0.8.3
|
83
|
+
description: Digest::MurmurHash is class collections of use algorithm MurmurHash desiged
|
84
84
|
by Austin Appleby.
|
85
85
|
email: co000ri@gmail.com
|
86
86
|
executables: []
|
@@ -96,7 +96,7 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- digest-murmurhash.gemspec
|
98
98
|
- ext/digest/murmurhash/extconf.rb
|
99
|
-
- ext/digest/murmurhash/
|
99
|
+
- ext/digest/murmurhash/murmurhash.c
|
100
100
|
- ext/digest/murmurhash/murmurhash.h
|
101
101
|
- ext/digest/murmurhash/murmurhash1.c
|
102
102
|
- ext/digest/murmurhash/murmurhash1.h
|
@@ -104,6 +104,14 @@ files:
|
|
104
104
|
- ext/digest/murmurhash/murmurhash2.h
|
105
105
|
- ext/digest/murmurhash/murmurhash2a.c
|
106
106
|
- ext/digest/murmurhash/murmurhash2a.h
|
107
|
+
- ext/digest/murmurhash/murmurhash64a.c
|
108
|
+
- ext/digest/murmurhash/murmurhash64a.h
|
109
|
+
- ext/digest/murmurhash/murmurhash64b.c
|
110
|
+
- ext/digest/murmurhash/murmurhash64b.h
|
111
|
+
- ext/digest/murmurhash/murmurhash_aligned2.c
|
112
|
+
- ext/digest/murmurhash/murmurhash_aligned2.h
|
113
|
+
- ext/digest/murmurhash/murmurhash_neutral2.c
|
114
|
+
- ext/digest/murmurhash/murmurhash_neutral2.h
|
107
115
|
- lib/digest/murmurhash.rb
|
108
116
|
- lib/digest/murmurhash/version.rb
|
109
117
|
- spec/bench.rb
|
@@ -134,7 +142,7 @@ rubyforge_project:
|
|
134
142
|
rubygems_version: 2.1.11
|
135
143
|
signing_key:
|
136
144
|
specification_version: 4
|
137
|
-
summary: Digest::
|
145
|
+
summary: Digest::MurmurHash is class collections of use algorithm MurmurHash desiged
|
138
146
|
by Austin Appleby.
|
139
147
|
test_files:
|
140
148
|
- spec/bench.rb
|