digest-murmurhash 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +16 -0
- data/Rakefile +11 -1
- data/digest-murmurhash.gemspec +3 -3
- data/ext/digest/murmurhash/extconf.rb +1 -2
- data/ext/digest/murmurhash/murmurhash.c +253 -0
- data/ext/digest/murmurhash/murmurhash.h +52 -35
- data/ext/digest/murmurhash/murmurhash1.c +18 -54
- data/ext/digest/murmurhash/murmurhash2.c +17 -56
- data/ext/digest/murmurhash/murmurhash2a.c +18 -42
- data/ext/digest/murmurhash/murmurhash64a.c +87 -0
- data/ext/digest/murmurhash/murmurhash64a.h +13 -0
- data/ext/digest/murmurhash/murmurhash64b.c +93 -0
- data/ext/digest/murmurhash/murmurhash64b.h +13 -0
- data/ext/digest/murmurhash/murmurhash_aligned2.c +87 -0
- data/ext/digest/murmurhash/murmurhash_aligned2.h +13 -0
- data/ext/digest/murmurhash/murmurhash_neutral2.c +87 -0
- data/ext/digest/murmurhash/murmurhash_neutral2.h +13 -0
- data/lib/digest/murmurhash/version.rb +1 -1
- data/spec/bench.rb +21 -31
- data/spec/digest_spec.rb +54 -34
- data/spec/exception_spec.rb +19 -4
- data/spec/mem_spec.rb +3 -6
- data/spec/spec_helper.rb +21 -0
- metadata +15 -7
- data/ext/digest/murmurhash/init.c +0 -39
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef MURMURHASH_ALIGNED2_INCLUDED
|
2
|
+
# define MURMURHASH_ALIGNED2_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur_aligned2_finish(VALUE self);
|
7
|
+
VALUE murmur_aligned2_to_i(VALUE self);
|
8
|
+
VALUE murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
VALUE murmur_aligned2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
|
+
VALUE murmur_aligned2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
|
+
|
12
|
+
#endif /* ifndef MURMURHASH_ALIGNED2_INCLUDED */
|
13
|
+
|
@@ -0,0 +1,87 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHashNeutral2 (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash_neutral2.h"
|
6
|
+
|
7
|
+
uint32_t
|
8
|
+
murmur_hash_process_neutral2(const char * key, uint32_t len, uint32_t seed)
|
9
|
+
{
|
10
|
+
const uint32_t m = 0x5bd1e995;
|
11
|
+
const int r = 24;
|
12
|
+
|
13
|
+
uint32_t h = seed ^ len;
|
14
|
+
|
15
|
+
const unsigned char * data = (const unsigned char *)key;
|
16
|
+
|
17
|
+
while(len >= 4) {
|
18
|
+
uint32_t k;
|
19
|
+
|
20
|
+
k = data[0];
|
21
|
+
k |= data[1] << 8;
|
22
|
+
k |= data[2] << 16;
|
23
|
+
k |= data[3] << 24;
|
24
|
+
|
25
|
+
k *= m;
|
26
|
+
k ^= k >> r;
|
27
|
+
k *= m;
|
28
|
+
|
29
|
+
h *= m;
|
30
|
+
h ^= k;
|
31
|
+
|
32
|
+
data += 4;
|
33
|
+
len -= 4;
|
34
|
+
}
|
35
|
+
|
36
|
+
switch(len) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
};
|
42
|
+
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
|
+
|
47
|
+
return h;
|
48
|
+
}
|
49
|
+
|
50
|
+
VALUE
|
51
|
+
murmur_neutral2_finish(VALUE self)
|
52
|
+
{
|
53
|
+
uint8_t digest[4];
|
54
|
+
uint32_t h;
|
55
|
+
|
56
|
+
h = _murmur_finish32(self, murmur_hash_process_neutral2);
|
57
|
+
ASSINE_BY_ENDIAN_32(digest, h);
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE
|
62
|
+
murmur_neutral2_to_i(VALUE self)
|
63
|
+
{
|
64
|
+
return ULL2NUM(_murmur_finish32(self, murmur_hash_process_neutral2));
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE
|
68
|
+
murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
|
69
|
+
{
|
70
|
+
uint8_t digest[4];
|
71
|
+
uint64_t h;
|
72
|
+
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
|
73
|
+
ASSINE_BY_ENDIAN_32(digest, h);
|
74
|
+
return rb_str_new((const char*) digest, 4);
|
75
|
+
}
|
76
|
+
|
77
|
+
VALUE
|
78
|
+
murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass)
|
79
|
+
{
|
80
|
+
return hexencode_str_new(murmur_neutral2_s_digest(argc, argv, klass));
|
81
|
+
}
|
82
|
+
|
83
|
+
VALUE
|
84
|
+
murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
85
|
+
{
|
86
|
+
return ULL2NUM(_murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2));
|
87
|
+
}
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#ifndef MURMURHASH_NEUTRAL2_INCLUDED
|
2
|
+
# define MURMURHASH_NEUTRAL2_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur_neutral2_finish(VALUE self);
|
7
|
+
VALUE murmur_neutral2_to_i(VALUE self);
|
8
|
+
VALUE murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
VALUE murmur_neutral2_s_hexdigest(int argc, VALUE *argv, VALUE klass);
|
10
|
+
VALUE murmur_neutral2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
11
|
+
|
12
|
+
#endif /* ifndef MURMURHASH_NEUTRAL2_INCLUDED */
|
13
|
+
|
data/spec/bench.rb
CHANGED
@@ -7,18 +7,13 @@ require 'digest/stringbuffer'
|
|
7
7
|
require 'digest/murmurhash'
|
8
8
|
require 'benchmark'
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
buffer.unpack("C*").each do |c|
|
18
|
-
result += (c * @prime)
|
19
|
-
end
|
20
|
-
[result & 0xffffffff].pack("N")
|
21
|
-
end
|
10
|
+
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
11
|
+
def rand_str length
|
12
|
+
rand = "";
|
13
|
+
length.times {
|
14
|
+
rand << @rands.sample
|
15
|
+
}
|
16
|
+
rand
|
22
17
|
end
|
23
18
|
|
24
19
|
class Integer
|
@@ -27,12 +22,12 @@ class Integer
|
|
27
22
|
end
|
28
23
|
end
|
29
24
|
|
30
|
-
def murmur_hash str
|
25
|
+
def murmur_hash str, seed
|
31
26
|
data = str.dup.unpack("C*")
|
32
27
|
m = 0x5bd1e995
|
33
28
|
r = 16
|
34
29
|
length = str.bytesize
|
35
|
-
h = (length * m).to_32
|
30
|
+
h = (seed ^ (length * m).to_32).to_32
|
36
31
|
|
37
32
|
while 4 <= length
|
38
33
|
d = data.shift(4).pack("C*").unpack("I")[0]
|
@@ -62,34 +57,29 @@ def murmur_hash str
|
|
62
57
|
h
|
63
58
|
end
|
64
59
|
|
65
|
-
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
66
|
-
def rand_str
|
67
|
-
rand = "";
|
68
|
-
20.times {
|
69
|
-
rand << @rands[rand(62)]
|
70
|
-
}
|
71
|
-
rand
|
72
|
-
end
|
73
|
-
|
74
60
|
n = 100000
|
75
61
|
times_enum = n.times
|
76
62
|
|
77
63
|
a = Array.new(n, 0)
|
78
64
|
n.times do |i|
|
79
|
-
a[i] = rand_str
|
65
|
+
a[i] = rand_str 20
|
80
66
|
end
|
81
|
-
|
67
|
+
seed = rand(2**32)
|
68
|
+
seed_str32 = [seed].pack("L")
|
69
|
+
seed_str64 = [seed].pack("Q")
|
82
70
|
c = Struct.new "Cases",
|
83
71
|
:name,
|
84
72
|
:func
|
85
73
|
cases = [
|
86
|
-
c.new("pureRuby", proc{|x| murmur_hash x }),
|
87
|
-
c.new("
|
88
|
-
c.new("
|
89
|
-
c.new("
|
90
|
-
c.new("
|
74
|
+
c.new("pureRuby", proc{|x| murmur_hash x, seed }),
|
75
|
+
c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x, seed_str32 }),
|
76
|
+
c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x, seed_str32 }),
|
77
|
+
c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x, seed_str32 }),
|
78
|
+
c.new("MurmurHash64A", proc{|x| Digest::MurmurHash64A.rawdigest x, seed_str64 }),
|
79
|
+
c.new("MurmurHash64B", proc{|x| Digest::MurmurHash64B.rawdigest x, seed_str64 }),
|
80
|
+
c.new("MurmurHashNeutral2", proc{|x| Digest::MurmurHashNeutral2.rawdigest x, seed_str32 }),
|
81
|
+
c.new("MurmurHashAligned2", proc{|x| Digest::MurmurHashAligned2.rawdigest x, seed_str32 }),
|
91
82
|
]
|
92
|
-
|
93
83
|
reals = {}
|
94
84
|
confrict = {}
|
95
85
|
confricts = {}
|
data/spec/digest_spec.rb
CHANGED
@@ -1,48 +1,68 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
4
|
-
|
5
|
-
|
3
|
+
describe Digest::MurmurHash do
|
4
|
+
it "seed" do
|
5
|
+
all.each do |c|
|
6
|
+
m = c.new
|
7
|
+
expect(c::DEFAULT_SEED == m.seed).to be true
|
8
|
+
end
|
6
9
|
end
|
7
10
|
|
8
|
-
it "
|
9
|
-
|
10
|
-
|
11
|
-
|
11
|
+
it "seed=" do
|
12
|
+
all.each do |c|
|
13
|
+
m = c.new
|
14
|
+
m.update "murmurhash"
|
15
|
+
before_digest = m.hexdigest
|
16
|
+
m.seed = (c::DEFAULT_SEED.length == 4) ? seed32 : seed64
|
17
|
+
expect(c::DEFAULT_SEED != m.seed).to be true
|
18
|
+
expect(before_digest != m.hexdigest).to be true
|
19
|
+
end
|
12
20
|
end
|
13
21
|
|
14
|
-
it "
|
15
|
-
|
16
|
-
|
17
|
-
|
22
|
+
it "initialize" do
|
23
|
+
all.each do |c|
|
24
|
+
expect(c.new).to be_a_kind_of(Digest::StringBuffer)
|
25
|
+
end
|
18
26
|
end
|
19
27
|
|
20
|
-
it "hexdigest" do
|
21
|
-
|
22
|
-
|
23
|
-
|
28
|
+
it "digest and hexdigest" do
|
29
|
+
all.each do |c|
|
30
|
+
[:digest, :hexdigest].each do |method|
|
31
|
+
str = "a" * 1024
|
32
|
+
d = c.send(method, str)
|
33
|
+
d2 = c.send(method, str, (c::DEFAULT_SEED.length == 4) ? seed32 : seed64)
|
34
|
+
expect(d).to be_a_kind_of(String)
|
35
|
+
expect(d2).to be_a_kind_of(String)
|
36
|
+
expect(d.length).to be > 0
|
37
|
+
expect(d2.length).to be > 0
|
38
|
+
expect(d != d2).to be true
|
39
|
+
end
|
40
|
+
end
|
24
41
|
end
|
25
42
|
|
26
43
|
it "rawdigest" do
|
27
|
-
|
28
|
-
|
29
|
-
|
44
|
+
all.each do |c|
|
45
|
+
str = "a" * 1024
|
46
|
+
d = c.rawdigest str
|
47
|
+
d2 = c.rawdigest str, (c::DEFAULT_SEED.length == 4) ? seed32 : seed64
|
48
|
+
expect(d).to be_a_kind_of(Integer)
|
49
|
+
expect(d2).to be_a_kind_of(Integer)
|
50
|
+
expect(d).to be > 0
|
51
|
+
expect(d2).to be > 0
|
52
|
+
expect(d != d2).to be true
|
53
|
+
end
|
30
54
|
end
|
31
55
|
|
32
|
-
it "update and reset and hexdigest" do
|
33
|
-
|
34
|
-
MurmurHash1 => "c709abd5",
|
35
|
-
MurmurHash2 => "33f67c7e",
|
36
|
-
MurmurHash2A => "df25554b",
|
37
|
-
}.each do |c, should|
|
56
|
+
it "update and reset and hexdigest(32bit)" do
|
57
|
+
all.each do |c|
|
38
58
|
murmur = c.new
|
39
59
|
murmur.update("m").update("u").update("r")
|
40
60
|
murmur << "m" << "u" << "r"
|
41
61
|
murmur << "hash"
|
42
|
-
|
43
|
-
expect(murmur.hexdigest).to
|
44
|
-
|
45
|
-
expect(murmur.hexdigest).to eq(
|
62
|
+
hex = murmur.hexdigest
|
63
|
+
expect(murmur.hexdigest! == hex).to be true
|
64
|
+
reset_str = "0" * ((c::DEFAULT_SEED.length == 4) ? 8 : 16)
|
65
|
+
expect(murmur.hexdigest).to eq(reset_str)
|
46
66
|
end
|
47
67
|
end
|
48
68
|
|
@@ -69,14 +89,14 @@ describe "Digest::MurmurHash1 and 2" do
|
|
69
89
|
end
|
70
90
|
|
71
91
|
it "length" do
|
72
|
-
|
73
|
-
|
74
|
-
|
92
|
+
all.each do |c|
|
93
|
+
expect(c.new.length == c::DEFAULT_SEED.length).to be true
|
94
|
+
end
|
75
95
|
end
|
76
96
|
|
77
97
|
it "to_i" do
|
78
|
-
|
79
|
-
|
80
|
-
|
98
|
+
all.each do |c|
|
99
|
+
expect(c.new.update("murmurhash").to_i).to be_a_kind_of(Integer)
|
100
|
+
end
|
81
101
|
end
|
82
102
|
end
|
data/spec/exception_spec.rb
CHANGED
@@ -1,10 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe MurmurHash do
|
4
|
-
let :all do
|
5
|
-
[MurmurHash1, MurmurHash2, MurmurHash2A]
|
6
|
-
end
|
7
|
-
|
8
4
|
it "update nil" do
|
9
5
|
all.each do |c|
|
10
6
|
murmur = c.new
|
@@ -17,4 +13,23 @@ describe MurmurHash do
|
|
17
13
|
expect{ c.rawdigest }.to raise_error(ArgumentError)
|
18
14
|
end
|
19
15
|
end
|
16
|
+
|
17
|
+
it "seed digest" do
|
18
|
+
all.each do |c|
|
19
|
+
expect{ c.digest "", ""}.to raise_error(ArgumentError)
|
20
|
+
expect{ c.digest "", "\x00" }.to raise_error(ArgumentError)
|
21
|
+
seed = (c::DEFAULT_SEED.bytesize == 4) ? seed32 : seed64
|
22
|
+
expect{ c.digest "", seed }.to_not raise_error
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
it "seed instance" do
|
27
|
+
all.each do |c|
|
28
|
+
murmur = c.new
|
29
|
+
expect{ murmur.seed = "" }.to raise_error(ArgumentError)
|
30
|
+
expect{ murmur.seed = "\x00" }.to raise_error(ArgumentError)
|
31
|
+
seed = (c::DEFAULT_SEED.bytesize == 4) ? seed32 : seed64
|
32
|
+
expect{ murmur.seed = seed }.to_not raise_error
|
33
|
+
end
|
34
|
+
end
|
20
35
|
end
|
data/spec/mem_spec.rb
CHANGED
@@ -2,16 +2,13 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe MurmurHash do
|
4
4
|
it "gc safe" do
|
5
|
-
|
6
|
-
MurmurHash1 => "c709abd5",
|
7
|
-
MurmurHash2 => "33f67c7e",
|
8
|
-
MurmurHash2A => "df25554b",
|
9
|
-
}.each do |c, should|
|
5
|
+
all.each do |c|
|
10
6
|
murmur = c.new
|
7
|
+
init = murmur.to_s
|
11
8
|
GC.start
|
12
9
|
murmur.update("murmur")
|
13
10
|
GC.start
|
14
|
-
expect(murmur.update("hash").to_s).to
|
11
|
+
expect(murmur.update("hash").to_s != init).to be true
|
15
12
|
end
|
16
13
|
end
|
17
14
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,2 +1,23 @@
|
|
1
1
|
require 'digest/murmurhash'
|
2
2
|
include Digest
|
3
|
+
|
4
|
+
def all
|
5
|
+
[
|
6
|
+
MurmurHash1,
|
7
|
+
MurmurHash2,
|
8
|
+
MurmurHash2A,
|
9
|
+
MurmurHash64A,
|
10
|
+
MurmurHash64B,
|
11
|
+
MurmurHashNeutral2,
|
12
|
+
MurmurHashAligned2
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
def seed32
|
17
|
+
(0..4).to_a.pack("C4")
|
18
|
+
end
|
19
|
+
|
20
|
+
def seed64
|
21
|
+
(0..8).to_a.pack("C8")
|
22
|
+
end
|
23
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-murmurhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ksss
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: digest-stringbuffer
|
@@ -72,15 +72,15 @@ dependencies:
|
|
72
72
|
requirements:
|
73
73
|
- - ~>
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
75
|
+
version: 0.8.3
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - ~>
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
83
|
-
description: Digest::
|
82
|
+
version: 0.8.3
|
83
|
+
description: Digest::MurmurHash is class collections of use algorithm MurmurHash desiged
|
84
84
|
by Austin Appleby.
|
85
85
|
email: co000ri@gmail.com
|
86
86
|
executables: []
|
@@ -96,7 +96,7 @@ files:
|
|
96
96
|
- Rakefile
|
97
97
|
- digest-murmurhash.gemspec
|
98
98
|
- ext/digest/murmurhash/extconf.rb
|
99
|
-
- ext/digest/murmurhash/
|
99
|
+
- ext/digest/murmurhash/murmurhash.c
|
100
100
|
- ext/digest/murmurhash/murmurhash.h
|
101
101
|
- ext/digest/murmurhash/murmurhash1.c
|
102
102
|
- ext/digest/murmurhash/murmurhash1.h
|
@@ -104,6 +104,14 @@ files:
|
|
104
104
|
- ext/digest/murmurhash/murmurhash2.h
|
105
105
|
- ext/digest/murmurhash/murmurhash2a.c
|
106
106
|
- ext/digest/murmurhash/murmurhash2a.h
|
107
|
+
- ext/digest/murmurhash/murmurhash64a.c
|
108
|
+
- ext/digest/murmurhash/murmurhash64a.h
|
109
|
+
- ext/digest/murmurhash/murmurhash64b.c
|
110
|
+
- ext/digest/murmurhash/murmurhash64b.h
|
111
|
+
- ext/digest/murmurhash/murmurhash_aligned2.c
|
112
|
+
- ext/digest/murmurhash/murmurhash_aligned2.h
|
113
|
+
- ext/digest/murmurhash/murmurhash_neutral2.c
|
114
|
+
- ext/digest/murmurhash/murmurhash_neutral2.h
|
107
115
|
- lib/digest/murmurhash.rb
|
108
116
|
- lib/digest/murmurhash/version.rb
|
109
117
|
- spec/bench.rb
|
@@ -134,7 +142,7 @@ rubyforge_project:
|
|
134
142
|
rubygems_version: 2.1.11
|
135
143
|
signing_key:
|
136
144
|
specification_version: 4
|
137
|
-
summary: Digest::
|
145
|
+
summary: Digest::MurmurHash is class collections of use algorithm MurmurHash desiged
|
138
146
|
by Austin Appleby.
|
139
147
|
test_files:
|
140
148
|
- spec/bench.rb
|