digest-murmurhash 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +5 -3
- data/README.md +27 -42
- data/digest-murmurhash.gemspec +1 -1
- data/ext/digest/murmurhash/1.c +1 -1
- data/ext/digest/murmurhash/2.c +1 -1
- data/ext/digest/murmurhash/2a.c +1 -1
- data/ext/digest/murmurhash/aligned2.c +2 -2
- data/ext/digest/murmurhash/init.c +15 -6
- data/ext/digest/murmurhash/neutral2.c +1 -1
- data/lib/digest/murmurhash/base.rb +3 -3
- data/spec/bench.rb +39 -76
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b61d7d502181711a8b3b63e9b5609274e56c3b27
|
4
|
+
data.tar.gz: 66e152971a285ad197563d14dd480a8e03b5095a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a761f71b68f236f70acdd525322e99a1f0e7bb304fb22c164dd557a461963b9a62c85d017a509f7409c47ab8377f3475f4f0a248ff2d39e4ccd7eee4ffd60e0
|
7
|
+
data.tar.gz: 074fafcd53b409d3596af8d16bdf51ad2a8756c10d65974ec3a9365537d7fb6375ba6e163cc1938d99d18dbce1cd624e72cbf9b9bfcbc5f4ab09ac186d8f175e
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -50,50 +50,35 @@ p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> "41962e71"
|
|
50
50
|
- You can try *all* Hurmurhash algorithms
|
51
51
|
- You can use same interface built-in Digest::{MD5,SH1,...} classes.
|
52
52
|
|
53
|
-
##
|
54
|
-
|
55
|
-
```ruby
|
56
|
-
#! /usr/bin/env ruby
|
57
|
-
|
58
|
-
require 'benchmark'
|
59
|
-
require 'digest/md5'
|
60
|
-
require 'digest/sha1'
|
61
|
-
require 'digest/murmurhash'
|
62
|
-
require 'digest/siphash'
|
63
|
-
|
64
|
-
include Digest
|
65
|
-
|
66
|
-
n = 1000
|
67
|
-
str = "teststrings" * 1024 * 10
|
68
|
-
|
69
|
-
Benchmark.bm do |f|
|
70
|
-
[MurmurHash1, MurmurHash2, MurmurHash2A, MurmurHash64A, MurmurHash64B, MurmurHashNeutral2, MurmurHashAligned2, MurmurHash3_x86_32, MurmurHash3_x86_128, MurmurHash3_x64_128, SipHash, MD5, SHA1].each do |klass|
|
71
|
-
f.report(klass.to_s) {
|
72
|
-
i = 0
|
73
|
-
while i < n
|
74
|
-
klass.digest(str)
|
75
|
-
i += 1
|
76
|
-
end
|
77
|
-
}
|
78
|
-
end
|
79
|
-
end
|
80
|
-
```
|
53
|
+
## Benchmark
|
81
54
|
|
82
55
|
```
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
56
|
+
$ bundle ex ruby spec/bench.rb
|
57
|
+
### condition
|
58
|
+
|
59
|
+
RUBY_VERSION = 2.2.2
|
60
|
+
count = 100
|
61
|
+
data size = 1024 KB
|
62
|
+
|
63
|
+
### benchmark
|
64
|
+
|
65
|
+
MurmurHash1 0.05145844700746238
|
66
|
+
MurmurHash2 0.03399810096016154
|
67
|
+
MurmurHash2A 0.032202123024035245
|
68
|
+
MurmurHash64A 0.01880742999492213
|
69
|
+
MurmurHash64B 0.02227930899243802
|
70
|
+
MurmurHashNeutral2 0.04134096298366785
|
71
|
+
MurmurHashAligned2 0.03514020598959178
|
72
|
+
MurmurHash3_x86_32 0.1174831380485557
|
73
|
+
MurmurHash3_x86_128 0.1244338889955543
|
74
|
+
MurmurHash3_x64_128 0.07122775202151388
|
75
|
+
Digest::MD5 0.1401238819817081
|
76
|
+
Digest::SHA1 0.09980463003739715
|
77
|
+
Digest::SHA256 0.2248609460075386
|
78
|
+
Digest::SHA2 0.2245309569989331
|
79
|
+
OpenSSL::HMAC(sha256) 0.23656713595846668
|
80
|
+
Base64 0.1765239799860865
|
81
|
+
Zlib.crc32 0.009485946036875248
|
97
82
|
```
|
98
83
|
|
99
84
|
## Installation
|
data/digest-murmurhash.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |spec|
|
4
4
|
spec.name = "digest-murmurhash"
|
5
|
-
spec.version = "1.1.
|
5
|
+
spec.version = "1.1.1"
|
6
6
|
spec.authors = ["ksss"]
|
7
7
|
spec.email = ["co000ri@gmail.com"]
|
8
8
|
spec.description = %q{digest-murmurhash is class collections of use algorithm MurmurHash desiged by Austin Appleby.}
|
data/ext/digest/murmurhash/1.c
CHANGED
data/ext/digest/murmurhash/2.c
CHANGED
data/ext/digest/murmurhash/2a.c
CHANGED
@@ -16,7 +16,7 @@ murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
|
|
16
16
|
|
17
17
|
uint32_t h = seed ^ len;
|
18
18
|
|
19
|
-
|
19
|
+
uint32_t align = (uint64_t)data & 3;
|
20
20
|
|
21
21
|
if(align && (len >= 4))
|
22
22
|
{
|
@@ -150,7 +150,7 @@ VALUE
|
|
150
150
|
murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
|
151
151
|
{
|
152
152
|
uint8_t digest[4];
|
153
|
-
|
153
|
+
uint32_t h;
|
154
154
|
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
|
155
155
|
assign_by_endian_32(digest, h);
|
156
156
|
return rb_str_new((const char*) digest, 4);
|
@@ -111,13 +111,22 @@ assign_by_endian_128(uint8_t *digest, void *out)
|
|
111
111
|
}
|
112
112
|
}
|
113
113
|
|
114
|
+
static uint32_t
|
115
|
+
rstring2uint32_t(VALUE str)
|
116
|
+
{
|
117
|
+
long len = RSTRING_LEN(str);
|
118
|
+
if (UINT32_MAX < len) {
|
119
|
+
rb_raise(rb_eRangeError, "String length=%ld will overflow from long to uint32_t", len);
|
120
|
+
}
|
121
|
+
return (uint32_t)len;
|
122
|
+
}
|
114
123
|
|
115
124
|
uint32_t
|
116
125
|
_murmur_finish32(VALUE self, uint32_t (*process)(const char*, uint32_t, uint32_t))
|
117
126
|
{
|
118
127
|
const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
|
119
128
|
VALUE buffer = rb_ivar_get(self, iv_buffer);
|
120
|
-
return process(RSTRING_PTR(buffer),
|
129
|
+
return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed);
|
121
130
|
}
|
122
131
|
|
123
132
|
uint64_t
|
@@ -125,7 +134,7 @@ _murmur_finish64(VALUE self, uint64_t (*process)(const char*, uint32_t, uint64_t
|
|
125
134
|
{
|
126
135
|
const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
|
127
136
|
VALUE buffer = rb_ivar_get(self, iv_buffer);
|
128
|
-
return process(RSTRING_PTR(buffer),
|
137
|
+
return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint64_t*)seed);
|
129
138
|
}
|
130
139
|
|
131
140
|
void
|
@@ -133,7 +142,7 @@ _murmur_finish128(VALUE self, void *out, void (*process)(const char*, uint32_t,
|
|
133
142
|
{
|
134
143
|
const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
|
135
144
|
VALUE buffer = rb_ivar_get(self, iv_buffer);
|
136
|
-
process(RSTRING_PTR(buffer),
|
145
|
+
process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed, out);
|
137
146
|
}
|
138
147
|
|
139
148
|
uint32_t
|
@@ -159,7 +168,7 @@ _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const
|
|
159
168
|
seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
|
160
169
|
}
|
161
170
|
|
162
|
-
return process(RSTRING_PTR(str),
|
171
|
+
return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed);
|
163
172
|
}
|
164
173
|
|
165
174
|
uint64_t
|
@@ -185,7 +194,7 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
|
|
185
194
|
seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
|
186
195
|
}
|
187
196
|
|
188
|
-
return process(RSTRING_PTR(str),
|
197
|
+
return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint64_t*)seed);
|
189
198
|
}
|
190
199
|
|
191
200
|
void
|
@@ -212,7 +221,7 @@ _murmur_s_digest128(int argc, VALUE *argv, VALUE klass, void *out, void (*proces
|
|
212
221
|
seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
|
213
222
|
}
|
214
223
|
|
215
|
-
process(RSTRING_PTR(str),
|
224
|
+
process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed, out);
|
216
225
|
}
|
217
226
|
|
218
227
|
|
@@ -62,7 +62,7 @@ VALUE
|
|
62
62
|
murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
|
63
63
|
{
|
64
64
|
uint8_t digest[4];
|
65
|
-
|
65
|
+
uint32_t h;
|
66
66
|
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
|
67
67
|
assign_by_endian_32(digest, h);
|
68
68
|
return rb_str_new((const char*) digest, 4);
|
@@ -17,7 +17,7 @@ module Digest
|
|
17
17
|
}.each do |name, s|
|
18
18
|
class_eval %Q{
|
19
19
|
class MurmurHash#{name} < Digest::Class
|
20
|
-
DEFAULT_SEED = "#{"\x00" * s.seed_length}".
|
20
|
+
DEFAULT_SEED = "#{"\x00" * s.seed_length}".b
|
21
21
|
|
22
22
|
def initialize
|
23
23
|
@buffer = ""
|
@@ -25,7 +25,7 @@ module Digest
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def update(str)
|
28
|
-
@buffer
|
28
|
+
@buffer << str
|
29
29
|
self
|
30
30
|
end
|
31
31
|
alias << update
|
@@ -41,7 +41,7 @@ module Digest
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def seed=(s)
|
44
|
-
|
44
|
+
raise ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
|
45
45
|
@seed = s
|
46
46
|
end
|
47
47
|
|
data/spec/bench.rb
CHANGED
@@ -4,6 +4,10 @@ lib = File.expand_path('../../lib', __FILE__)
|
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
6
|
require 'digest/murmurhash'
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'digest/sha1'
|
9
|
+
require 'openssl'
|
10
|
+
require 'zlib'
|
7
11
|
require 'benchmark'
|
8
12
|
|
9
13
|
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
@@ -15,53 +19,6 @@ def rand_str length
|
|
15
19
|
rand
|
16
20
|
end
|
17
21
|
|
18
|
-
class Integer
|
19
|
-
def to_32
|
20
|
-
self & 0xffffffff
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def murmur_hash str, seed
|
25
|
-
data = str.dup.unpack("C*")
|
26
|
-
m = 0x5bd1e995
|
27
|
-
r = 16
|
28
|
-
length = str.bytesize
|
29
|
-
h = (seed ^ (length * m).to_32).to_32
|
30
|
-
|
31
|
-
while 4 <= length
|
32
|
-
d = data.shift(4).pack("C*").unpack("I")[0]
|
33
|
-
h = (h + d).to_32
|
34
|
-
h = (h * m).to_32
|
35
|
-
h ^= h >> r
|
36
|
-
length -= 4
|
37
|
-
end
|
38
|
-
|
39
|
-
if 2 < length
|
40
|
-
h = (h + (data[2] << 16).to_32).to_32
|
41
|
-
end
|
42
|
-
if 1 < length
|
43
|
-
h = (h + (data[1] << 8).to_32).to_32
|
44
|
-
end
|
45
|
-
if 0 < length
|
46
|
-
h = (h + data[0]).to_32
|
47
|
-
h = (h * m).to_32
|
48
|
-
h ^= h >> r
|
49
|
-
end
|
50
|
-
|
51
|
-
h = (h * m).to_32
|
52
|
-
h ^= h >> 10
|
53
|
-
h = (h * m).to_32
|
54
|
-
h ^= h >> 17
|
55
|
-
|
56
|
-
h
|
57
|
-
end
|
58
|
-
|
59
|
-
n = 100000
|
60
|
-
|
61
|
-
a = Array.new(n, 0)
|
62
|
-
n.times do |i|
|
63
|
-
a[i] = rand_str 20
|
64
|
-
end
|
65
22
|
seed = rand(2**32)
|
66
23
|
seed_str32 = [seed].pack("L")
|
67
24
|
seed_str64 = [seed].pack("Q")
|
@@ -69,7 +26,6 @@ c = Struct.new "Cases",
|
|
69
26
|
:name,
|
70
27
|
:func
|
71
28
|
cases = [
|
72
|
-
c.new("pureRuby", proc{|x| murmur_hash x, seed }),
|
73
29
|
c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.digest x, seed_str32 }),
|
74
30
|
c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.digest x, seed_str32 }),
|
75
31
|
c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.digest x, seed_str32 }),
|
@@ -80,53 +36,60 @@ cases = [
|
|
80
36
|
c.new("MurmurHash3_x86_32", proc{|x| Digest::MurmurHash3_x86_32.digest x, seed_str32 }),
|
81
37
|
c.new("MurmurHash3_x86_128", proc{|x| Digest::MurmurHash3_x86_128.digest x, seed_str32 }),
|
82
38
|
c.new("MurmurHash3_x64_128", proc{|x| Digest::MurmurHash3_x64_128.digest x, seed_str32 }),
|
39
|
+
c.new("Digest::MD5", proc{|x| Digest::MD5.digest x }),
|
40
|
+
c.new("Digest::SHA1", proc{|x| Digest::SHA1.digest x }),
|
41
|
+
c.new("Digest::SHA256", proc{|x| Digest::SHA256.digest x }),
|
42
|
+
c.new("Digest::SHA2", proc{|x| Digest::SHA2.digest x }),
|
43
|
+
c.new("OpenSSL::HMAC(sha256)", proc{|x| OpenSSL::HMAC.digest "sha256", seed_str32, x }),
|
44
|
+
c.new("Base64", proc{|x| [x].pack("m0") }),
|
45
|
+
c.new("Zlib.crc32", proc{|x| Zlib.crc32(x) }),
|
83
46
|
]
|
84
|
-
|
47
|
+
|
85
48
|
confrict = {}
|
86
49
|
confricts = {}
|
87
50
|
|
51
|
+
n = 100
|
52
|
+
a = Array.new(n, 0)
|
53
|
+
n.times do |i|
|
54
|
+
a[i] = rand_str(1024*1024)
|
55
|
+
end
|
56
|
+
|
88
57
|
puts "### condition"
|
89
58
|
puts
|
90
59
|
puts " RUBY_VERSION = #{RUBY_VERSION}"
|
91
60
|
puts " count = #{n}"
|
61
|
+
puts " data size = #{a[0].length / 1024} KB"
|
92
62
|
puts
|
93
63
|
puts "### benchmark"
|
94
64
|
puts
|
95
65
|
puts "```"
|
96
|
-
Benchmark.bm do |x|
|
97
|
-
cases.each do |c|
|
98
|
-
i = 0
|
99
|
-
z = x.report c.name do
|
100
|
-
while i < n
|
101
|
-
c.func.call(a[i])
|
102
|
-
i += 1
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
confrict.clear
|
107
66
|
|
108
|
-
|
67
|
+
GC.start
|
68
|
+
cases.each do |c|
|
69
|
+
i = 0
|
70
|
+
b = Benchmark.realtime do
|
109
71
|
while i < n
|
110
|
-
|
111
|
-
if confrict[rethash].nil?
|
112
|
-
confrict[rethash] = 0
|
113
|
-
else
|
114
|
-
confrict[rethash] += 1
|
115
|
-
end
|
72
|
+
c.func.call(a[i])
|
116
73
|
i += 1
|
117
74
|
end
|
118
|
-
reals[c.name] = z.real
|
119
|
-
confricts[c.name] = confrict.count{|hash, count| 0 < count}
|
120
75
|
end
|
121
|
-
|
122
|
-
puts "```"
|
76
|
+
puts "#{c.name}\t#{b}"
|
123
77
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
78
|
+
confrict.clear
|
79
|
+
|
80
|
+
i = 0
|
81
|
+
while i < n
|
82
|
+
rethash = c.func.call(a[i])
|
83
|
+
if confrict[rethash].nil?
|
84
|
+
confrict[rethash] = 0
|
85
|
+
else
|
86
|
+
confrict[rethash] += 1
|
87
|
+
end
|
88
|
+
i += 1
|
89
|
+
end
|
90
|
+
confricts[c.name] = confrict.count{|hash, count| 0 < count}
|
129
91
|
end
|
92
|
+
puts "```"
|
130
93
|
|
131
94
|
puts
|
132
95
|
puts "### confrict count (/#{n})"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-murmurhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ksss
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
version: '0'
|
123
123
|
requirements: []
|
124
124
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.5.1
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: digest-murmurhash is class collections of use algorithm MurmurHash desiged
|