digest-murmurhash 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +5 -3
- data/README.md +27 -42
- data/digest-murmurhash.gemspec +1 -1
- data/ext/digest/murmurhash/1.c +1 -1
- data/ext/digest/murmurhash/2.c +1 -1
- data/ext/digest/murmurhash/2a.c +1 -1
- data/ext/digest/murmurhash/aligned2.c +2 -2
- data/ext/digest/murmurhash/init.c +15 -6
- data/ext/digest/murmurhash/neutral2.c +1 -1
- data/lib/digest/murmurhash/base.rb +3 -3
- data/spec/bench.rb +39 -76
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b61d7d502181711a8b3b63e9b5609274e56c3b27
|
4
|
+
data.tar.gz: 66e152971a285ad197563d14dd480a8e03b5095a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1a761f71b68f236f70acdd525322e99a1f0e7bb304fb22c164dd557a461963b9a62c85d017a509f7409c47ab8377f3475f4f0a248ff2d39e4ccd7eee4ffd60e0
|
7
|
+
data.tar.gz: 074fafcd53b409d3596af8d16bdf51ad2a8756c10d65974ec3a9365537d7fb6375ba6e163cc1938d99d18dbce1cd624e72cbf9b9bfcbc5f4ab09ac186d8f175e
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -50,50 +50,35 @@ p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> "41962e71"
|
|
50
50
|
- You can try *all* Hurmurhash algorithms
|
51
51
|
- You can use same interface built-in Digest::{MD5,SH1,...} classes.
|
52
52
|
|
53
|
-
##
|
54
|
-
|
55
|
-
```ruby
|
56
|
-
#! /usr/bin/env ruby
|
57
|
-
|
58
|
-
require 'benchmark'
|
59
|
-
require 'digest/md5'
|
60
|
-
require 'digest/sha1'
|
61
|
-
require 'digest/murmurhash'
|
62
|
-
require 'digest/siphash'
|
63
|
-
|
64
|
-
include Digest
|
65
|
-
|
66
|
-
n = 1000
|
67
|
-
str = "teststrings" * 1024 * 10
|
68
|
-
|
69
|
-
Benchmark.bm do |f|
|
70
|
-
[MurmurHash1, MurmurHash2, MurmurHash2A, MurmurHash64A, MurmurHash64B, MurmurHashNeutral2, MurmurHashAligned2, MurmurHash3_x86_32, MurmurHash3_x86_128, MurmurHash3_x64_128, SipHash, MD5, SHA1].each do |klass|
|
71
|
-
f.report(klass.to_s) {
|
72
|
-
i = 0
|
73
|
-
while i < n
|
74
|
-
klass.digest(str)
|
75
|
-
i += 1
|
76
|
-
end
|
77
|
-
}
|
78
|
-
end
|
79
|
-
end
|
80
|
-
```
|
53
|
+
## Benchmark
|
81
54
|
|
82
55
|
```
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
56
|
+
$ bundle ex ruby spec/bench.rb
|
57
|
+
### condition
|
58
|
+
|
59
|
+
RUBY_VERSION = 2.2.2
|
60
|
+
count = 100
|
61
|
+
data size = 1024 KB
|
62
|
+
|
63
|
+
### benchmark
|
64
|
+
|
65
|
+
MurmurHash1 0.05145844700746238
|
66
|
+
MurmurHash2 0.03399810096016154
|
67
|
+
MurmurHash2A 0.032202123024035245
|
68
|
+
MurmurHash64A 0.01880742999492213
|
69
|
+
MurmurHash64B 0.02227930899243802
|
70
|
+
MurmurHashNeutral2 0.04134096298366785
|
71
|
+
MurmurHashAligned2 0.03514020598959178
|
72
|
+
MurmurHash3_x86_32 0.1174831380485557
|
73
|
+
MurmurHash3_x86_128 0.1244338889955543
|
74
|
+
MurmurHash3_x64_128 0.07122775202151388
|
75
|
+
Digest::MD5 0.1401238819817081
|
76
|
+
Digest::SHA1 0.09980463003739715
|
77
|
+
Digest::SHA256 0.2248609460075386
|
78
|
+
Digest::SHA2 0.2245309569989331
|
79
|
+
OpenSSL::HMAC(sha256) 0.23656713595846668
|
80
|
+
Base64 0.1765239799860865
|
81
|
+
Zlib.crc32 0.009485946036875248
|
97
82
|
```
|
98
83
|
|
99
84
|
## Installation
|
data/digest-murmurhash.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |spec|
|
4
4
|
spec.name = "digest-murmurhash"
|
5
|
-
spec.version = "1.1.
|
5
|
+
spec.version = "1.1.1"
|
6
6
|
spec.authors = ["ksss"]
|
7
7
|
spec.email = ["co000ri@gmail.com"]
|
8
8
|
spec.description = %q{digest-murmurhash is class collections of use algorithm MurmurHash desiged by Austin Appleby.}
|
data/ext/digest/murmurhash/1.c
CHANGED
data/ext/digest/murmurhash/2.c
CHANGED
data/ext/digest/murmurhash/2a.c
CHANGED
@@ -16,7 +16,7 @@ murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
|
|
16
16
|
|
17
17
|
uint32_t h = seed ^ len;
|
18
18
|
|
19
|
-
|
19
|
+
uint32_t align = (uint64_t)data & 3;
|
20
20
|
|
21
21
|
if(align && (len >= 4))
|
22
22
|
{
|
@@ -150,7 +150,7 @@ VALUE
|
|
150
150
|
murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
|
151
151
|
{
|
152
152
|
uint8_t digest[4];
|
153
|
-
|
153
|
+
uint32_t h;
|
154
154
|
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
|
155
155
|
assign_by_endian_32(digest, h);
|
156
156
|
return rb_str_new((const char*) digest, 4);
|
@@ -111,13 +111,22 @@ assign_by_endian_128(uint8_t *digest, void *out)
|
|
111
111
|
}
|
112
112
|
}
|
113
113
|
|
114
|
+
static uint32_t
|
115
|
+
rstring2uint32_t(VALUE str)
|
116
|
+
{
|
117
|
+
long len = RSTRING_LEN(str);
|
118
|
+
if (UINT32_MAX < len) {
|
119
|
+
rb_raise(rb_eRangeError, "String length=%ld will overflow from long to uint32_t", len);
|
120
|
+
}
|
121
|
+
return (uint32_t)len;
|
122
|
+
}
|
114
123
|
|
115
124
|
uint32_t
|
116
125
|
_murmur_finish32(VALUE self, uint32_t (*process)(const char*, uint32_t, uint32_t))
|
117
126
|
{
|
118
127
|
const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
|
119
128
|
VALUE buffer = rb_ivar_get(self, iv_buffer);
|
120
|
-
return process(RSTRING_PTR(buffer),
|
129
|
+
return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed);
|
121
130
|
}
|
122
131
|
|
123
132
|
uint64_t
|
@@ -125,7 +134,7 @@ _murmur_finish64(VALUE self, uint64_t (*process)(const char*, uint32_t, uint64_t
|
|
125
134
|
{
|
126
135
|
const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
|
127
136
|
VALUE buffer = rb_ivar_get(self, iv_buffer);
|
128
|
-
return process(RSTRING_PTR(buffer),
|
137
|
+
return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint64_t*)seed);
|
129
138
|
}
|
130
139
|
|
131
140
|
void
|
@@ -133,7 +142,7 @@ _murmur_finish128(VALUE self, void *out, void (*process)(const char*, uint32_t,
|
|
133
142
|
{
|
134
143
|
const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
|
135
144
|
VALUE buffer = rb_ivar_get(self, iv_buffer);
|
136
|
-
process(RSTRING_PTR(buffer),
|
145
|
+
process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed, out);
|
137
146
|
}
|
138
147
|
|
139
148
|
uint32_t
|
@@ -159,7 +168,7 @@ _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const
|
|
159
168
|
seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
|
160
169
|
}
|
161
170
|
|
162
|
-
return process(RSTRING_PTR(str),
|
171
|
+
return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed);
|
163
172
|
}
|
164
173
|
|
165
174
|
uint64_t
|
@@ -185,7 +194,7 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
|
|
185
194
|
seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
|
186
195
|
}
|
187
196
|
|
188
|
-
return process(RSTRING_PTR(str),
|
197
|
+
return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint64_t*)seed);
|
189
198
|
}
|
190
199
|
|
191
200
|
void
|
@@ -212,7 +221,7 @@ _murmur_s_digest128(int argc, VALUE *argv, VALUE klass, void *out, void (*proces
|
|
212
221
|
seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
|
213
222
|
}
|
214
223
|
|
215
|
-
process(RSTRING_PTR(str),
|
224
|
+
process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed, out);
|
216
225
|
}
|
217
226
|
|
218
227
|
|
@@ -62,7 +62,7 @@ VALUE
|
|
62
62
|
murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
|
63
63
|
{
|
64
64
|
uint8_t digest[4];
|
65
|
-
|
65
|
+
uint32_t h;
|
66
66
|
h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
|
67
67
|
assign_by_endian_32(digest, h);
|
68
68
|
return rb_str_new((const char*) digest, 4);
|
@@ -17,7 +17,7 @@ module Digest
|
|
17
17
|
}.each do |name, s|
|
18
18
|
class_eval %Q{
|
19
19
|
class MurmurHash#{name} < Digest::Class
|
20
|
-
DEFAULT_SEED = "#{"\x00" * s.seed_length}".
|
20
|
+
DEFAULT_SEED = "#{"\x00" * s.seed_length}".b
|
21
21
|
|
22
22
|
def initialize
|
23
23
|
@buffer = ""
|
@@ -25,7 +25,7 @@ module Digest
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def update(str)
|
28
|
-
@buffer
|
28
|
+
@buffer << str
|
29
29
|
self
|
30
30
|
end
|
31
31
|
alias << update
|
@@ -41,7 +41,7 @@ module Digest
|
|
41
41
|
end
|
42
42
|
|
43
43
|
def seed=(s)
|
44
|
-
|
44
|
+
raise ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
|
45
45
|
@seed = s
|
46
46
|
end
|
47
47
|
|
data/spec/bench.rb
CHANGED
@@ -4,6 +4,10 @@ lib = File.expand_path('../../lib', __FILE__)
|
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
6
|
require 'digest/murmurhash'
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'digest/sha1'
|
9
|
+
require 'openssl'
|
10
|
+
require 'zlib'
|
7
11
|
require 'benchmark'
|
8
12
|
|
9
13
|
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
@@ -15,53 +19,6 @@ def rand_str length
|
|
15
19
|
rand
|
16
20
|
end
|
17
21
|
|
18
|
-
class Integer
|
19
|
-
def to_32
|
20
|
-
self & 0xffffffff
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def murmur_hash str, seed
|
25
|
-
data = str.dup.unpack("C*")
|
26
|
-
m = 0x5bd1e995
|
27
|
-
r = 16
|
28
|
-
length = str.bytesize
|
29
|
-
h = (seed ^ (length * m).to_32).to_32
|
30
|
-
|
31
|
-
while 4 <= length
|
32
|
-
d = data.shift(4).pack("C*").unpack("I")[0]
|
33
|
-
h = (h + d).to_32
|
34
|
-
h = (h * m).to_32
|
35
|
-
h ^= h >> r
|
36
|
-
length -= 4
|
37
|
-
end
|
38
|
-
|
39
|
-
if 2 < length
|
40
|
-
h = (h + (data[2] << 16).to_32).to_32
|
41
|
-
end
|
42
|
-
if 1 < length
|
43
|
-
h = (h + (data[1] << 8).to_32).to_32
|
44
|
-
end
|
45
|
-
if 0 < length
|
46
|
-
h = (h + data[0]).to_32
|
47
|
-
h = (h * m).to_32
|
48
|
-
h ^= h >> r
|
49
|
-
end
|
50
|
-
|
51
|
-
h = (h * m).to_32
|
52
|
-
h ^= h >> 10
|
53
|
-
h = (h * m).to_32
|
54
|
-
h ^= h >> 17
|
55
|
-
|
56
|
-
h
|
57
|
-
end
|
58
|
-
|
59
|
-
n = 100000
|
60
|
-
|
61
|
-
a = Array.new(n, 0)
|
62
|
-
n.times do |i|
|
63
|
-
a[i] = rand_str 20
|
64
|
-
end
|
65
22
|
seed = rand(2**32)
|
66
23
|
seed_str32 = [seed].pack("L")
|
67
24
|
seed_str64 = [seed].pack("Q")
|
@@ -69,7 +26,6 @@ c = Struct.new "Cases",
|
|
69
26
|
:name,
|
70
27
|
:func
|
71
28
|
cases = [
|
72
|
-
c.new("pureRuby", proc{|x| murmur_hash x, seed }),
|
73
29
|
c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.digest x, seed_str32 }),
|
74
30
|
c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.digest x, seed_str32 }),
|
75
31
|
c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.digest x, seed_str32 }),
|
@@ -80,53 +36,60 @@ cases = [
|
|
80
36
|
c.new("MurmurHash3_x86_32", proc{|x| Digest::MurmurHash3_x86_32.digest x, seed_str32 }),
|
81
37
|
c.new("MurmurHash3_x86_128", proc{|x| Digest::MurmurHash3_x86_128.digest x, seed_str32 }),
|
82
38
|
c.new("MurmurHash3_x64_128", proc{|x| Digest::MurmurHash3_x64_128.digest x, seed_str32 }),
|
39
|
+
c.new("Digest::MD5", proc{|x| Digest::MD5.digest x }),
|
40
|
+
c.new("Digest::SHA1", proc{|x| Digest::SHA1.digest x }),
|
41
|
+
c.new("Digest::SHA256", proc{|x| Digest::SHA256.digest x }),
|
42
|
+
c.new("Digest::SHA2", proc{|x| Digest::SHA2.digest x }),
|
43
|
+
c.new("OpenSSL::HMAC(sha256)", proc{|x| OpenSSL::HMAC.digest "sha256", seed_str32, x }),
|
44
|
+
c.new("Base64", proc{|x| [x].pack("m0") }),
|
45
|
+
c.new("Zlib.crc32", proc{|x| Zlib.crc32(x) }),
|
83
46
|
]
|
84
|
-
|
47
|
+
|
85
48
|
confrict = {}
|
86
49
|
confricts = {}
|
87
50
|
|
51
|
+
n = 100
|
52
|
+
a = Array.new(n, 0)
|
53
|
+
n.times do |i|
|
54
|
+
a[i] = rand_str(1024*1024)
|
55
|
+
end
|
56
|
+
|
88
57
|
puts "### condition"
|
89
58
|
puts
|
90
59
|
puts " RUBY_VERSION = #{RUBY_VERSION}"
|
91
60
|
puts " count = #{n}"
|
61
|
+
puts " data size = #{a[0].length / 1024} KB"
|
92
62
|
puts
|
93
63
|
puts "### benchmark"
|
94
64
|
puts
|
95
65
|
puts "```"
|
96
|
-
Benchmark.bm do |x|
|
97
|
-
cases.each do |c|
|
98
|
-
i = 0
|
99
|
-
z = x.report c.name do
|
100
|
-
while i < n
|
101
|
-
c.func.call(a[i])
|
102
|
-
i += 1
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
confrict.clear
|
107
66
|
|
108
|
-
|
67
|
+
GC.start
|
68
|
+
cases.each do |c|
|
69
|
+
i = 0
|
70
|
+
b = Benchmark.realtime do
|
109
71
|
while i < n
|
110
|
-
|
111
|
-
if confrict[rethash].nil?
|
112
|
-
confrict[rethash] = 0
|
113
|
-
else
|
114
|
-
confrict[rethash] += 1
|
115
|
-
end
|
72
|
+
c.func.call(a[i])
|
116
73
|
i += 1
|
117
74
|
end
|
118
|
-
reals[c.name] = z.real
|
119
|
-
confricts[c.name] = confrict.count{|hash, count| 0 < count}
|
120
75
|
end
|
121
|
-
|
122
|
-
puts "```"
|
76
|
+
puts "#{c.name}\t#{b}"
|
123
77
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
78
|
+
confrict.clear
|
79
|
+
|
80
|
+
i = 0
|
81
|
+
while i < n
|
82
|
+
rethash = c.func.call(a[i])
|
83
|
+
if confrict[rethash].nil?
|
84
|
+
confrict[rethash] = 0
|
85
|
+
else
|
86
|
+
confrict[rethash] += 1
|
87
|
+
end
|
88
|
+
i += 1
|
89
|
+
end
|
90
|
+
confricts[c.name] = confrict.count{|hash, count| 0 < count}
|
129
91
|
end
|
92
|
+
puts "```"
|
130
93
|
|
131
94
|
puts
|
132
95
|
puts "### confrict count (/#{n})"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-murmurhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ksss
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-01-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
version: '0'
|
123
123
|
requirements: []
|
124
124
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.
|
125
|
+
rubygems_version: 2.5.1
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
128
|
summary: digest-murmurhash is class collections of use algorithm MurmurHash desiged
|