digest-murmurhash 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd9a5f608b1cac914b791b811bc75eba224ca841
4
- data.tar.gz: ed6533e449b132c385c3457708c52d556d47114d
3
+ metadata.gz: b61d7d502181711a8b3b63e9b5609274e56c3b27
4
+ data.tar.gz: 66e152971a285ad197563d14dd480a8e03b5095a
5
5
  SHA512:
6
- metadata.gz: 54555dc73a82614b6fe9ba109662e96d4dad18b8e9ed30a073d9cde49d611b256158381d9c61280a9dbe7783e8db4967ccc8de2af9a2a507c71bdca87c99730b
7
- data.tar.gz: 4b47b42486c884ddb3edee9b7c4f1ec11529657baf471492e45c113abc6e7dba622a5e7e7cc664480c9790b139fe49942e091c3257f0134635316075f8ce3791
6
+ metadata.gz: 1a761f71b68f236f70acdd525322e99a1f0e7bb304fb22c164dd557a461963b9a62c85d017a509f7409c47ab8377f3475f4f0a248ff2d39e4ccd7eee4ffd60e0
7
+ data.tar.gz: 074fafcd53b409d3596af8d16bdf51ad2a8756c10d65974ec3a9365537d7fb6375ba6e163cc1938d99d18dbce1cd624e72cbf9b9bfcbc5f4ab09ac186d8f175e
@@ -1,6 +1,8 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.3
4
3
  - 2.0.0
5
- - 2.1.3
6
- - 2.2.0
4
+ - 2.1.8
5
+ - 2.2.4
6
+ - 2.3.0
7
+ notifications:
8
+ email: false
data/README.md CHANGED
@@ -50,50 +50,35 @@ p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> "41962e71"
50
50
  - You can try *all* Hurmurhash algorithms
51
51
  - You can use same interface built-in Digest::{MD5,SH1,...} classes.
52
52
 
53
- ## Simple benchmark
54
-
55
- ```ruby
56
- #! /usr/bin/env ruby
57
-
58
- require 'benchmark'
59
- require 'digest/md5'
60
- require 'digest/sha1'
61
- require 'digest/murmurhash'
62
- require 'digest/siphash'
63
-
64
- include Digest
65
-
66
- n = 1000
67
- str = "teststrings" * 1024 * 10
68
-
69
- Benchmark.bm do |f|
70
- [MurmurHash1, MurmurHash2, MurmurHash2A, MurmurHash64A, MurmurHash64B, MurmurHashNeutral2, MurmurHashAligned2, MurmurHash3_x86_32, MurmurHash3_x86_128, MurmurHash3_x64_128, SipHash, MD5, SHA1].each do |klass|
71
- f.report(klass.to_s) {
72
- i = 0
73
- while i < n
74
- klass.digest(str)
75
- i += 1
76
- end
77
- }
78
- end
79
- end
80
- ```
53
+ ## Benchmark
81
54
 
82
55
  ```
83
- user system total real
84
- Digest::MurmurHash1 0.050000 0.010000 0.060000 ( 0.047889)
85
- Digest::MurmurHash2 0.030000 0.000000 0.030000 ( 0.034564)
86
- Digest::MurmurHash2A 0.030000 0.010000 0.040000 ( 0.031808)
87
- Digest::MurmurHash64A 0.010000 0.000000 0.010000 ( 0.018400)
88
- Digest::MurmurHash64B 0.030000 0.000000 0.030000 ( 0.027818)
89
- Digest::MurmurHashNeutral2 0.040000 0.000000 0.040000 ( 0.041021)
90
- Digest::MurmurHashAligned2 0.020000 0.010000 0.030000 ( 0.030409)
91
- Digest::MurmurHash3_x86_32 0.130000 0.010000 0.140000 ( 0.139622)
92
- Digest::MurmurHash3_x86_128 0.120000 0.020000 0.140000 ( 0.143768)
93
- Digest::MurmurHash3_x64_128 0.070000 0.010000 0.080000 ( 0.072687)
94
- Digest::SipHash 0.060000 0.010000 0.070000 ( 0.068243)
95
- Digest::MD5 0.130000 0.010000 0.140000 ( 0.153793)
96
- Digest::SHA1 0.130000 0.020000 0.150000 ( 0.137686)
56
+ $ bundle ex ruby spec/bench.rb
57
+ ### condition
58
+
59
+ RUBY_VERSION = 2.2.2
60
+ count = 100
61
+ data size = 1024 KB
62
+
63
+ ### benchmark
64
+
65
+ MurmurHash1 0.05145844700746238
66
+ MurmurHash2 0.03399810096016154
67
+ MurmurHash2A 0.032202123024035245
68
+ MurmurHash64A 0.01880742999492213
69
+ MurmurHash64B 0.02227930899243802
70
+ MurmurHashNeutral2 0.04134096298366785
71
+ MurmurHashAligned2 0.03514020598959178
72
+ MurmurHash3_x86_32 0.1174831380485557
73
+ MurmurHash3_x86_128 0.1244338889955543
74
+ MurmurHash3_x64_128 0.07122775202151388
75
+ Digest::MD5 0.1401238819817081
76
+ Digest::SHA1 0.09980463003739715
77
+ Digest::SHA256 0.2248609460075386
78
+ Digest::SHA2 0.2245309569989331
79
+ OpenSSL::HMAC(sha256) 0.23656713595846668
80
+ Base64 0.1765239799860865
81
+ Zlib.crc32 0.009485946036875248
97
82
  ```
98
83
 
99
84
  ## Installation
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "digest-murmurhash"
5
- spec.version = "1.1.0"
5
+ spec.version = "1.1.1"
6
6
  spec.authors = ["ksss"]
7
7
  spec.email = ["co000ri@gmail.com"]
8
8
  spec.description = %q{digest-murmurhash is class collections of use algorithm MurmurHash desiged by Austin Appleby.}
@@ -45,7 +45,7 @@ VALUE
45
45
  murmur1_finish(VALUE self)
46
46
  {
47
47
  uint8_t digest[4];
48
- uint64_t h;
48
+ uint32_t h;
49
49
 
50
50
  h = _murmur_finish32(self, murmur_hash_process1);
51
51
  assign_by_endian_32(digest, h);
@@ -44,7 +44,7 @@ VALUE
44
44
  murmur2_finish(VALUE self)
45
45
  {
46
46
  uint8_t digest[4];
47
- uint64_t h;
47
+ uint32_t h;
48
48
 
49
49
  h = _murmur_finish32(self, murmur_hash_process2);
50
50
  assign_by_endian_32(digest, h);
@@ -45,7 +45,7 @@ VALUE
45
45
  murmur2a_finish(VALUE self)
46
46
  {
47
47
  uint8_t digest[4];
48
- uint64_t h;
48
+ uint32_t h;
49
49
 
50
50
  h = _murmur_finish32(self, murmur_hash_process2a);
51
51
  assign_by_endian_32(digest, h);
@@ -16,7 +16,7 @@ murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
16
16
 
17
17
  uint32_t h = seed ^ len;
18
18
 
19
- int align = (uint64_t)data & 3;
19
+ uint32_t align = (uint64_t)data & 3;
20
20
 
21
21
  if(align && (len >= 4))
22
22
  {
@@ -150,7 +150,7 @@ VALUE
150
150
  murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
151
151
  {
152
152
  uint8_t digest[4];
153
- uint64_t h;
153
+ uint32_t h;
154
154
  h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
155
155
  assign_by_endian_32(digest, h);
156
156
  return rb_str_new((const char*) digest, 4);
@@ -111,13 +111,22 @@ assign_by_endian_128(uint8_t *digest, void *out)
111
111
  }
112
112
  }
113
113
 
114
+ static uint32_t
115
+ rstring2uint32_t(VALUE str)
116
+ {
117
+ long len = RSTRING_LEN(str);
118
+ if (UINT32_MAX < len) {
119
+ rb_raise(rb_eRangeError, "String length=%ld will overflow from long to uint32_t", len);
120
+ }
121
+ return (uint32_t)len;
122
+ }
114
123
 
115
124
  uint32_t
116
125
  _murmur_finish32(VALUE self, uint32_t (*process)(const char*, uint32_t, uint32_t))
117
126
  {
118
127
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
119
128
  VALUE buffer = rb_ivar_get(self, iv_buffer);
120
- return process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint32_t*)seed);
129
+ return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed);
121
130
  }
122
131
 
123
132
  uint64_t
@@ -125,7 +134,7 @@ _murmur_finish64(VALUE self, uint64_t (*process)(const char*, uint32_t, uint64_t
125
134
  {
126
135
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
127
136
  VALUE buffer = rb_ivar_get(self, iv_buffer);
128
- return process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint64_t*)seed);
137
+ return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint64_t*)seed);
129
138
  }
130
139
 
131
140
  void
@@ -133,7 +142,7 @@ _murmur_finish128(VALUE self, void *out, void (*process)(const char*, uint32_t,
133
142
  {
134
143
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
135
144
  VALUE buffer = rb_ivar_get(self, iv_buffer);
136
- process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint32_t*)seed, out);
145
+ process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed, out);
137
146
  }
138
147
 
139
148
  uint32_t
@@ -159,7 +168,7 @@ _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const
159
168
  seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
160
169
  }
161
170
 
162
- return process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint32_t*)seed);
171
+ return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed);
163
172
  }
164
173
 
165
174
  uint64_t
@@ -185,7 +194,7 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
185
194
  seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
186
195
  }
187
196
 
188
- return process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint64_t*)seed);
197
+ return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint64_t*)seed);
189
198
  }
190
199
 
191
200
  void
@@ -212,7 +221,7 @@ _murmur_s_digest128(int argc, VALUE *argv, VALUE klass, void *out, void (*proces
212
221
  seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
213
222
  }
214
223
 
215
- process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint32_t*)seed, out);
224
+ process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed, out);
216
225
  }
217
226
 
218
227
 
@@ -62,7 +62,7 @@ VALUE
62
62
  murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
63
63
  {
64
64
  uint8_t digest[4];
65
- uint64_t h;
65
+ uint32_t h;
66
66
  h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
67
67
  assign_by_endian_32(digest, h);
68
68
  return rb_str_new((const char*) digest, 4);
@@ -17,7 +17,7 @@ module Digest
17
17
  }.each do |name, s|
18
18
  class_eval %Q{
19
19
  class MurmurHash#{name} < Digest::Class
20
- DEFAULT_SEED = "#{"\x00" * s.seed_length}".encode('ASCII-8BIT')
20
+ DEFAULT_SEED = "#{"\x00" * s.seed_length}".b
21
21
 
22
22
  def initialize
23
23
  @buffer = ""
@@ -25,7 +25,7 @@ module Digest
25
25
  end
26
26
 
27
27
  def update(str)
28
- @buffer += str
28
+ @buffer << str
29
29
  self
30
30
  end
31
31
  alias << update
@@ -41,7 +41,7 @@ module Digest
41
41
  end
42
42
 
43
43
  def seed=(s)
44
- fail ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
44
+ raise ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
45
45
  @seed = s
46
46
  end
47
47
 
@@ -4,6 +4,10 @@ lib = File.expand_path('../../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'digest/murmurhash'
7
+ require 'digest/md5'
8
+ require 'digest/sha1'
9
+ require 'openssl'
10
+ require 'zlib'
7
11
  require 'benchmark'
8
12
 
9
13
  @rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
@@ -15,53 +19,6 @@ def rand_str length
15
19
  rand
16
20
  end
17
21
 
18
- class Integer
19
- def to_32
20
- self & 0xffffffff
21
- end
22
- end
23
-
24
- def murmur_hash str, seed
25
- data = str.dup.unpack("C*")
26
- m = 0x5bd1e995
27
- r = 16
28
- length = str.bytesize
29
- h = (seed ^ (length * m).to_32).to_32
30
-
31
- while 4 <= length
32
- d = data.shift(4).pack("C*").unpack("I")[0]
33
- h = (h + d).to_32
34
- h = (h * m).to_32
35
- h ^= h >> r
36
- length -= 4
37
- end
38
-
39
- if 2 < length
40
- h = (h + (data[2] << 16).to_32).to_32
41
- end
42
- if 1 < length
43
- h = (h + (data[1] << 8).to_32).to_32
44
- end
45
- if 0 < length
46
- h = (h + data[0]).to_32
47
- h = (h * m).to_32
48
- h ^= h >> r
49
- end
50
-
51
- h = (h * m).to_32
52
- h ^= h >> 10
53
- h = (h * m).to_32
54
- h ^= h >> 17
55
-
56
- h
57
- end
58
-
59
- n = 100000
60
-
61
- a = Array.new(n, 0)
62
- n.times do |i|
63
- a[i] = rand_str 20
64
- end
65
22
  seed = rand(2**32)
66
23
  seed_str32 = [seed].pack("L")
67
24
  seed_str64 = [seed].pack("Q")
@@ -69,7 +26,6 @@ c = Struct.new "Cases",
69
26
  :name,
70
27
  :func
71
28
  cases = [
72
- c.new("pureRuby", proc{|x| murmur_hash x, seed }),
73
29
  c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.digest x, seed_str32 }),
74
30
  c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.digest x, seed_str32 }),
75
31
  c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.digest x, seed_str32 }),
@@ -80,53 +36,60 @@ cases = [
80
36
  c.new("MurmurHash3_x86_32", proc{|x| Digest::MurmurHash3_x86_32.digest x, seed_str32 }),
81
37
  c.new("MurmurHash3_x86_128", proc{|x| Digest::MurmurHash3_x86_128.digest x, seed_str32 }),
82
38
  c.new("MurmurHash3_x64_128", proc{|x| Digest::MurmurHash3_x64_128.digest x, seed_str32 }),
39
+ c.new("Digest::MD5", proc{|x| Digest::MD5.digest x }),
40
+ c.new("Digest::SHA1", proc{|x| Digest::SHA1.digest x }),
41
+ c.new("Digest::SHA256", proc{|x| Digest::SHA256.digest x }),
42
+ c.new("Digest::SHA2", proc{|x| Digest::SHA2.digest x }),
43
+ c.new("OpenSSL::HMAC(sha256)", proc{|x| OpenSSL::HMAC.digest "sha256", seed_str32, x }),
44
+ c.new("Base64", proc{|x| [x].pack("m0") }),
45
+ c.new("Zlib.crc32", proc{|x| Zlib.crc32(x) }),
83
46
  ]
84
- reals = {}
47
+
85
48
  confrict = {}
86
49
  confricts = {}
87
50
 
51
+ n = 100
52
+ a = Array.new(n, 0)
53
+ n.times do |i|
54
+ a[i] = rand_str(1024*1024)
55
+ end
56
+
88
57
  puts "### condition"
89
58
  puts
90
59
  puts " RUBY_VERSION = #{RUBY_VERSION}"
91
60
  puts " count = #{n}"
61
+ puts " data size = #{a[0].length / 1024} KB"
92
62
  puts
93
63
  puts "### benchmark"
94
64
  puts
95
65
  puts "```"
96
- Benchmark.bm do |x|
97
- cases.each do |c|
98
- i = 0
99
- z = x.report c.name do
100
- while i < n
101
- c.func.call(a[i])
102
- i += 1
103
- end
104
- end
105
-
106
- confrict.clear
107
66
 
108
- i = 0
67
+ GC.start
68
+ cases.each do |c|
69
+ i = 0
70
+ b = Benchmark.realtime do
109
71
  while i < n
110
- rethash = c.func.call(a[i])
111
- if confrict[rethash].nil?
112
- confrict[rethash] = 0
113
- else
114
- confrict[rethash] += 1
115
- end
72
+ c.func.call(a[i])
116
73
  i += 1
117
74
  end
118
- reals[c.name] = z.real
119
- confricts[c.name] = confrict.count{|hash, count| 0 < count}
120
75
  end
121
- end
122
- puts "```"
76
+ puts "#{c.name}\t#{b}"
123
77
 
124
- puts
125
- puts "### real second rate (pureRuby/)"
126
- puts
127
- reals.each do |name, real|
128
- puts " " + (reals["pureRuby"] / real).to_s + "/" + name
78
+ confrict.clear
79
+
80
+ i = 0
81
+ while i < n
82
+ rethash = c.func.call(a[i])
83
+ if confrict[rethash].nil?
84
+ confrict[rethash] = 0
85
+ else
86
+ confrict[rethash] += 1
87
+ end
88
+ i += 1
89
+ end
90
+ confricts[c.name] = confrict.count{|hash, count| 0 < count}
129
91
  end
92
+ puts "```"
130
93
 
131
94
  puts
132
95
  puts "### confrict count (/#{n})"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-murmurhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - ksss
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-16 00:00:00.000000000 Z
11
+ date: 2016-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
122
  version: '0'
123
123
  requirements: []
124
124
  rubyforge_project:
125
- rubygems_version: 2.4.1
125
+ rubygems_version: 2.5.1
126
126
  signing_key:
127
127
  specification_version: 4
128
128
  summary: digest-murmurhash is class collections of use algorithm MurmurHash desiged