digest-murmurhash 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cd9a5f608b1cac914b791b811bc75eba224ca841
4
- data.tar.gz: ed6533e449b132c385c3457708c52d556d47114d
3
+ metadata.gz: b61d7d502181711a8b3b63e9b5609274e56c3b27
4
+ data.tar.gz: 66e152971a285ad197563d14dd480a8e03b5095a
5
5
  SHA512:
6
- metadata.gz: 54555dc73a82614b6fe9ba109662e96d4dad18b8e9ed30a073d9cde49d611b256158381d9c61280a9dbe7783e8db4967ccc8de2af9a2a507c71bdca87c99730b
7
- data.tar.gz: 4b47b42486c884ddb3edee9b7c4f1ec11529657baf471492e45c113abc6e7dba622a5e7e7cc664480c9790b139fe49942e091c3257f0134635316075f8ce3791
6
+ metadata.gz: 1a761f71b68f236f70acdd525322e99a1f0e7bb304fb22c164dd557a461963b9a62c85d017a509f7409c47ab8377f3475f4f0a248ff2d39e4ccd7eee4ffd60e0
7
+ data.tar.gz: 074fafcd53b409d3596af8d16bdf51ad2a8756c10d65974ec3a9365537d7fb6375ba6e163cc1938d99d18dbce1cd624e72cbf9b9bfcbc5f4ab09ac186d8f175e
@@ -1,6 +1,8 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 1.9.3
4
3
  - 2.0.0
5
- - 2.1.3
6
- - 2.2.0
4
+ - 2.1.8
5
+ - 2.2.4
6
+ - 2.3.0
7
+ notifications:
8
+ email: false
data/README.md CHANGED
@@ -50,50 +50,35 @@ p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> "41962e71"
50
50
  - You can try *all* Hurmurhash algorithms
51
51
  - You can use same interface built-in Digest::{MD5,SH1,...} classes.
52
52
 
53
- ## Simple benchmark
54
-
55
- ```ruby
56
- #! /usr/bin/env ruby
57
-
58
- require 'benchmark'
59
- require 'digest/md5'
60
- require 'digest/sha1'
61
- require 'digest/murmurhash'
62
- require 'digest/siphash'
63
-
64
- include Digest
65
-
66
- n = 1000
67
- str = "teststrings" * 1024 * 10
68
-
69
- Benchmark.bm do |f|
70
- [MurmurHash1, MurmurHash2, MurmurHash2A, MurmurHash64A, MurmurHash64B, MurmurHashNeutral2, MurmurHashAligned2, MurmurHash3_x86_32, MurmurHash3_x86_128, MurmurHash3_x64_128, SipHash, MD5, SHA1].each do |klass|
71
- f.report(klass.to_s) {
72
- i = 0
73
- while i < n
74
- klass.digest(str)
75
- i += 1
76
- end
77
- }
78
- end
79
- end
80
- ```
53
+ ## Benchmark
81
54
 
82
55
  ```
83
- user system total real
84
- Digest::MurmurHash1 0.050000 0.010000 0.060000 ( 0.047889)
85
- Digest::MurmurHash2 0.030000 0.000000 0.030000 ( 0.034564)
86
- Digest::MurmurHash2A 0.030000 0.010000 0.040000 ( 0.031808)
87
- Digest::MurmurHash64A 0.010000 0.000000 0.010000 ( 0.018400)
88
- Digest::MurmurHash64B 0.030000 0.000000 0.030000 ( 0.027818)
89
- Digest::MurmurHashNeutral2 0.040000 0.000000 0.040000 ( 0.041021)
90
- Digest::MurmurHashAligned2 0.020000 0.010000 0.030000 ( 0.030409)
91
- Digest::MurmurHash3_x86_32 0.130000 0.010000 0.140000 ( 0.139622)
92
- Digest::MurmurHash3_x86_128 0.120000 0.020000 0.140000 ( 0.143768)
93
- Digest::MurmurHash3_x64_128 0.070000 0.010000 0.080000 ( 0.072687)
94
- Digest::SipHash 0.060000 0.010000 0.070000 ( 0.068243)
95
- Digest::MD5 0.130000 0.010000 0.140000 ( 0.153793)
96
- Digest::SHA1 0.130000 0.020000 0.150000 ( 0.137686)
56
+ $ bundle ex ruby spec/bench.rb
57
+ ### condition
58
+
59
+ RUBY_VERSION = 2.2.2
60
+ count = 100
61
+ data size = 1024 KB
62
+
63
+ ### benchmark
64
+
65
+ MurmurHash1 0.05145844700746238
66
+ MurmurHash2 0.03399810096016154
67
+ MurmurHash2A 0.032202123024035245
68
+ MurmurHash64A 0.01880742999492213
69
+ MurmurHash64B 0.02227930899243802
70
+ MurmurHashNeutral2 0.04134096298366785
71
+ MurmurHashAligned2 0.03514020598959178
72
+ MurmurHash3_x86_32 0.1174831380485557
73
+ MurmurHash3_x86_128 0.1244338889955543
74
+ MurmurHash3_x64_128 0.07122775202151388
75
+ Digest::MD5 0.1401238819817081
76
+ Digest::SHA1 0.09980463003739715
77
+ Digest::SHA256 0.2248609460075386
78
+ Digest::SHA2 0.2245309569989331
79
+ OpenSSL::HMAC(sha256) 0.23656713595846668
80
+ Base64 0.1765239799860865
81
+ Zlib.crc32 0.009485946036875248
97
82
  ```
98
83
 
99
84
  ## Installation
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |spec|
4
4
  spec.name = "digest-murmurhash"
5
- spec.version = "1.1.0"
5
+ spec.version = "1.1.1"
6
6
  spec.authors = ["ksss"]
7
7
  spec.email = ["co000ri@gmail.com"]
8
8
  spec.description = %q{digest-murmurhash is class collections of use algorithm MurmurHash desiged by Austin Appleby.}
@@ -45,7 +45,7 @@ VALUE
45
45
  murmur1_finish(VALUE self)
46
46
  {
47
47
  uint8_t digest[4];
48
- uint64_t h;
48
+ uint32_t h;
49
49
 
50
50
  h = _murmur_finish32(self, murmur_hash_process1);
51
51
  assign_by_endian_32(digest, h);
@@ -44,7 +44,7 @@ VALUE
44
44
  murmur2_finish(VALUE self)
45
45
  {
46
46
  uint8_t digest[4];
47
- uint64_t h;
47
+ uint32_t h;
48
48
 
49
49
  h = _murmur_finish32(self, murmur_hash_process2);
50
50
  assign_by_endian_32(digest, h);
@@ -45,7 +45,7 @@ VALUE
45
45
  murmur2a_finish(VALUE self)
46
46
  {
47
47
  uint8_t digest[4];
48
- uint64_t h;
48
+ uint32_t h;
49
49
 
50
50
  h = _murmur_finish32(self, murmur_hash_process2a);
51
51
  assign_by_endian_32(digest, h);
@@ -16,7 +16,7 @@ murmur_hash_process_aligned2(const char * key, uint32_t len, uint32_t seed)
16
16
 
17
17
  uint32_t h = seed ^ len;
18
18
 
19
- int align = (uint64_t)data & 3;
19
+ uint32_t align = (uint64_t)data & 3;
20
20
 
21
21
  if(align && (len >= 4))
22
22
  {
@@ -150,7 +150,7 @@ VALUE
150
150
  murmur_aligned2_s_digest(int argc, VALUE *argv, VALUE klass)
151
151
  {
152
152
  uint8_t digest[4];
153
- uint64_t h;
153
+ uint32_t h;
154
154
  h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_aligned2);
155
155
  assign_by_endian_32(digest, h);
156
156
  return rb_str_new((const char*) digest, 4);
@@ -111,13 +111,22 @@ assign_by_endian_128(uint8_t *digest, void *out)
111
111
  }
112
112
  }
113
113
 
114
+ static uint32_t
115
+ rstring2uint32_t(VALUE str)
116
+ {
117
+ long len = RSTRING_LEN(str);
118
+ if (UINT32_MAX < len) {
119
+ rb_raise(rb_eRangeError, "String length=%ld will overflow from long to uint32_t", len);
120
+ }
121
+ return (uint32_t)len;
122
+ }
114
123
 
115
124
  uint32_t
116
125
  _murmur_finish32(VALUE self, uint32_t (*process)(const char*, uint32_t, uint32_t))
117
126
  {
118
127
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
119
128
  VALUE buffer = rb_ivar_get(self, iv_buffer);
120
- return process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint32_t*)seed);
129
+ return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed);
121
130
  }
122
131
 
123
132
  uint64_t
@@ -125,7 +134,7 @@ _murmur_finish64(VALUE self, uint64_t (*process)(const char*, uint32_t, uint64_t
125
134
  {
126
135
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
127
136
  VALUE buffer = rb_ivar_get(self, iv_buffer);
128
- return process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint64_t*)seed);
137
+ return process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint64_t*)seed);
129
138
  }
130
139
 
131
140
  void
@@ -133,7 +142,7 @@ _murmur_finish128(VALUE self, void *out, void (*process)(const char*, uint32_t,
133
142
  {
134
143
  const char *seed = RSTRING_PTR(rb_ivar_get(self, iv_seed));
135
144
  VALUE buffer = rb_ivar_get(self, iv_buffer);
136
- process(RSTRING_PTR(buffer), RSTRING_LEN(buffer), *(uint32_t*)seed, out);
145
+ process(RSTRING_PTR(buffer), rstring2uint32_t(buffer), *(uint32_t*)seed, out);
137
146
  }
138
147
 
139
148
  uint32_t
@@ -159,7 +168,7 @@ _murmur_s_digest32(int argc, VALUE *argv, VALUE klass, uint32_t (*process)(const
159
168
  seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
160
169
  }
161
170
 
162
- return process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint32_t*)seed);
171
+ return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed);
163
172
  }
164
173
 
165
174
  uint64_t
@@ -185,7 +194,7 @@ _murmur_s_digest64(int argc, VALUE *argv, VALUE klass, uint64_t (*process)(const
185
194
  seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
186
195
  }
187
196
 
188
- return process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint64_t*)seed);
197
+ return process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint64_t*)seed);
189
198
  }
190
199
 
191
200
  void
@@ -212,7 +221,7 @@ _murmur_s_digest128(int argc, VALUE *argv, VALUE klass, void *out, void (*proces
212
221
  seed = RSTRING_PTR(rb_const_get(klass, id_DEFAULT_SEED));
213
222
  }
214
223
 
215
- process(RSTRING_PTR(str), RSTRING_LEN(str), *(uint32_t*)seed, out);
224
+ process(RSTRING_PTR(str), rstring2uint32_t(str), *(uint32_t*)seed, out);
216
225
  }
217
226
 
218
227
 
@@ -62,7 +62,7 @@ VALUE
62
62
  murmur_neutral2_s_digest(int argc, VALUE *argv, VALUE klass)
63
63
  {
64
64
  uint8_t digest[4];
65
- uint64_t h;
65
+ uint32_t h;
66
66
  h = _murmur_s_digest32(argc, argv, klass, murmur_hash_process_neutral2);
67
67
  assign_by_endian_32(digest, h);
68
68
  return rb_str_new((const char*) digest, 4);
@@ -17,7 +17,7 @@ module Digest
17
17
  }.each do |name, s|
18
18
  class_eval %Q{
19
19
  class MurmurHash#{name} < Digest::Class
20
- DEFAULT_SEED = "#{"\x00" * s.seed_length}".encode('ASCII-8BIT')
20
+ DEFAULT_SEED = "#{"\x00" * s.seed_length}".b
21
21
 
22
22
  def initialize
23
23
  @buffer = ""
@@ -25,7 +25,7 @@ module Digest
25
25
  end
26
26
 
27
27
  def update(str)
28
- @buffer += str
28
+ @buffer << str
29
29
  self
30
30
  end
31
31
  alias << update
@@ -41,7 +41,7 @@ module Digest
41
41
  end
42
42
 
43
43
  def seed=(s)
44
- fail ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
44
+ raise ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
45
45
  @seed = s
46
46
  end
47
47
 
@@ -4,6 +4,10 @@ lib = File.expand_path('../../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'digest/murmurhash'
7
+ require 'digest/md5'
8
+ require 'digest/sha1'
9
+ require 'openssl'
10
+ require 'zlib'
7
11
  require 'benchmark'
8
12
 
9
13
  @rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
@@ -15,53 +19,6 @@ def rand_str length
15
19
  rand
16
20
  end
17
21
 
18
- class Integer
19
- def to_32
20
- self & 0xffffffff
21
- end
22
- end
23
-
24
- def murmur_hash str, seed
25
- data = str.dup.unpack("C*")
26
- m = 0x5bd1e995
27
- r = 16
28
- length = str.bytesize
29
- h = (seed ^ (length * m).to_32).to_32
30
-
31
- while 4 <= length
32
- d = data.shift(4).pack("C*").unpack("I")[0]
33
- h = (h + d).to_32
34
- h = (h * m).to_32
35
- h ^= h >> r
36
- length -= 4
37
- end
38
-
39
- if 2 < length
40
- h = (h + (data[2] << 16).to_32).to_32
41
- end
42
- if 1 < length
43
- h = (h + (data[1] << 8).to_32).to_32
44
- end
45
- if 0 < length
46
- h = (h + data[0]).to_32
47
- h = (h * m).to_32
48
- h ^= h >> r
49
- end
50
-
51
- h = (h * m).to_32
52
- h ^= h >> 10
53
- h = (h * m).to_32
54
- h ^= h >> 17
55
-
56
- h
57
- end
58
-
59
- n = 100000
60
-
61
- a = Array.new(n, 0)
62
- n.times do |i|
63
- a[i] = rand_str 20
64
- end
65
22
  seed = rand(2**32)
66
23
  seed_str32 = [seed].pack("L")
67
24
  seed_str64 = [seed].pack("Q")
@@ -69,7 +26,6 @@ c = Struct.new "Cases",
69
26
  :name,
70
27
  :func
71
28
  cases = [
72
- c.new("pureRuby", proc{|x| murmur_hash x, seed }),
73
29
  c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.digest x, seed_str32 }),
74
30
  c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.digest x, seed_str32 }),
75
31
  c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.digest x, seed_str32 }),
@@ -80,53 +36,60 @@ cases = [
80
36
  c.new("MurmurHash3_x86_32", proc{|x| Digest::MurmurHash3_x86_32.digest x, seed_str32 }),
81
37
  c.new("MurmurHash3_x86_128", proc{|x| Digest::MurmurHash3_x86_128.digest x, seed_str32 }),
82
38
  c.new("MurmurHash3_x64_128", proc{|x| Digest::MurmurHash3_x64_128.digest x, seed_str32 }),
39
+ c.new("Digest::MD5", proc{|x| Digest::MD5.digest x }),
40
+ c.new("Digest::SHA1", proc{|x| Digest::SHA1.digest x }),
41
+ c.new("Digest::SHA256", proc{|x| Digest::SHA256.digest x }),
42
+ c.new("Digest::SHA2", proc{|x| Digest::SHA2.digest x }),
43
+ c.new("OpenSSL::HMAC(sha256)", proc{|x| OpenSSL::HMAC.digest "sha256", seed_str32, x }),
44
+ c.new("Base64", proc{|x| [x].pack("m0") }),
45
+ c.new("Zlib.crc32", proc{|x| Zlib.crc32(x) }),
83
46
  ]
84
- reals = {}
47
+
85
48
  confrict = {}
86
49
  confricts = {}
87
50
 
51
+ n = 100
52
+ a = Array.new(n, 0)
53
+ n.times do |i|
54
+ a[i] = rand_str(1024*1024)
55
+ end
56
+
88
57
  puts "### condition"
89
58
  puts
90
59
  puts " RUBY_VERSION = #{RUBY_VERSION}"
91
60
  puts " count = #{n}"
61
+ puts " data size = #{a[0].length / 1024} KB"
92
62
  puts
93
63
  puts "### benchmark"
94
64
  puts
95
65
  puts "```"
96
- Benchmark.bm do |x|
97
- cases.each do |c|
98
- i = 0
99
- z = x.report c.name do
100
- while i < n
101
- c.func.call(a[i])
102
- i += 1
103
- end
104
- end
105
-
106
- confrict.clear
107
66
 
108
- i = 0
67
+ GC.start
68
+ cases.each do |c|
69
+ i = 0
70
+ b = Benchmark.realtime do
109
71
  while i < n
110
- rethash = c.func.call(a[i])
111
- if confrict[rethash].nil?
112
- confrict[rethash] = 0
113
- else
114
- confrict[rethash] += 1
115
- end
72
+ c.func.call(a[i])
116
73
  i += 1
117
74
  end
118
- reals[c.name] = z.real
119
- confricts[c.name] = confrict.count{|hash, count| 0 < count}
120
75
  end
121
- end
122
- puts "```"
76
+ puts "#{c.name}\t#{b}"
123
77
 
124
- puts
125
- puts "### real second rate (pureRuby/)"
126
- puts
127
- reals.each do |name, real|
128
- puts " " + (reals["pureRuby"] / real).to_s + "/" + name
78
+ confrict.clear
79
+
80
+ i = 0
81
+ while i < n
82
+ rethash = c.func.call(a[i])
83
+ if confrict[rethash].nil?
84
+ confrict[rethash] = 0
85
+ else
86
+ confrict[rethash] += 1
87
+ end
88
+ i += 1
89
+ end
90
+ confricts[c.name] = confrict.count{|hash, count| 0 < count}
129
91
  end
92
+ puts "```"
130
93
 
131
94
  puts
132
95
  puts "### confrict count (/#{n})"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-murmurhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - ksss
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-10-16 00:00:00.000000000 Z
11
+ date: 2016-01-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -122,7 +122,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
122
122
  version: '0'
123
123
  requirements: []
124
124
  rubyforge_project:
125
- rubygems_version: 2.4.1
125
+ rubygems_version: 2.5.1
126
126
  signing_key:
127
127
  specification_version: 4
128
128
  summary: digest-murmurhash is class collections of use algorithm MurmurHash desiged