digest-murmurhash 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +66 -26
  3. data/Rakefile +2 -2
  4. data/digest-murmurhash.gemspec +6 -7
  5. data/ext/digest/murmurhash/{murmurhash1.c → 1.c} +1 -1
  6. data/ext/digest/murmurhash/{murmurhash2.c → 2.c} +1 -1
  7. data/ext/digest/murmurhash/{murmurhash2a.c → 2a.c} +1 -1
  8. data/ext/digest/murmurhash/3_x64_128.c +117 -0
  9. data/ext/digest/murmurhash/3_x86_128.c +141 -0
  10. data/ext/digest/murmurhash/3_x86_32.c +88 -0
  11. data/ext/digest/murmurhash/{murmurhash64a.c → 64a.c} +1 -1
  12. data/ext/digest/murmurhash/{murmurhash64b.c → 64b.c} +2 -2
  13. data/ext/digest/murmurhash/aligned2.c +163 -0
  14. data/ext/digest/murmurhash/extconf.rb +1 -1
  15. data/ext/digest/murmurhash/{murmurhash.c → init.c} +136 -22
  16. data/ext/digest/murmurhash/init.h +94 -0
  17. data/ext/digest/murmurhash/{murmurhash_neutral2.c → neutral2.c} +3 -3
  18. data/lib/digest/murmurhash.rb +3 -73
  19. data/lib/digest/murmurhash/base.rb +58 -0
  20. data/spec/bench.rb +18 -12
  21. data/spec/digest_spec.rb +41 -29
  22. data/spec/exception_spec.rb +17 -12
  23. data/spec/mem_spec.rb +1 -1
  24. data/spec/spec_helper.rb +6 -8
  25. metadata +20 -36
  26. data/ext/digest/murmurhash/murmurhash.h +0 -46
  27. data/ext/digest/murmurhash/murmurhash1.h +0 -11
  28. data/ext/digest/murmurhash/murmurhash2.h +0 -11
  29. data/ext/digest/murmurhash/murmurhash2a.h +0 -11
  30. data/ext/digest/murmurhash/murmurhash64a.h +0 -10
  31. data/ext/digest/murmurhash/murmurhash64b.h +0 -11
  32. data/ext/digest/murmurhash/murmurhash_aligned2.c +0 -75
  33. data/ext/digest/murmurhash/murmurhash_aligned2.h +0 -11
  34. data/ext/digest/murmurhash/murmurhash_neutral2.h +0 -11
@@ -0,0 +1,94 @@
1
+ #ifndef MURMURHASH_INCLUDED
2
+ # define MURMURHASH_INCLUDED
3
+
4
+ #include "ruby.h"
5
+
6
+ // Microsoft Visual Studio
7
+
8
+ #if defined(_MSC_VER)
9
+ #define FORCE_INLINE __forceinline
10
+ #include <stdlib.h>
11
+ #define ROTL32(x,y) _rotl(x,y)
12
+ #define ROTL64(x,y) _rotl64(x,y)
13
+ #define BIG_CONSTANT(x) (x)
14
+ #else // defined(_MSC_VER)
15
+ #define FORCE_INLINE inline __attribute__((always_inline))
16
+ #define ROTL32(x,y) rotl32(x,y)
17
+ #define ROTL64(x,y) rotl64(x,y)
18
+ #define BIG_CONSTANT(x) (x##LLU)
19
+ #endif // !defined(_MSC_VER)
20
+
21
+ #ifdef DYNAMIC_ENDIAN
22
+ /* for universal binary of NEXTSTEP and MacOS X */
23
+ /* useless since autoconf 2.63? */
24
+ static int
25
+ is_bigendian(void)
26
+ {
27
+ static int init = 0;
28
+ static int endian_value;
29
+ char *p;
30
+
31
+ if (init) return endian_value;
32
+ init = 1;
33
+ p = (char*)&init;
34
+ return endian_value = p[0] ? 0 : 1;
35
+ }
36
+ # define BIGENDIAN_P() (is_bigendian())
37
+ #elif defined(WORDS_BIGENDIAN)
38
+ # define BIGENDIAN_P() 1
39
+ #else
40
+ # define BIGENDIAN_P() 0
41
+ #endif
42
+
43
+ #define MURMURHASH_MAGIC 0x5bd1e995
44
+ #define MURMURHASH_MAGIC64A BIG_CONSTANT(0xc6a4a7935bd1e995)
45
+
46
+ void assign_by_endian_32(uint8_t *digest, uint32_t h);
47
+ void assign_by_endian_64(uint8_t *digest, uint64_t h);
48
+ void assign_by_endian_128(uint8_t*, void*);
49
+
50
+ uint32_t rotl32(uint32_t, int8_t);
51
+ uint64_t rotl64(uint64_t, int8_t);
52
+ uint32_t getblock32(const uint32_t*, int);
53
+ uint64_t getblock64(const uint64_t*, int);
54
+ uint32_t fmix32(uint32_t);
55
+ uint64_t fmix64(uint64_t);
56
+ uint32_t _murmur_finish32(VALUE, uint32_t (*)(const char*, uint32_t, uint32_t));
57
+ uint64_t _murmur_finish64(VALUE, uint64_t (*)(const char*, uint32_t, uint64_t));
58
+ void _murmur_finish128(VALUE, void*, void (*)(const char*, uint32_t, uint32_t, void*));
59
+ uint32_t _murmur_s_digest32(int, VALUE*, VALUE, uint32_t (*)(const char*, uint32_t, uint32_t));
60
+ uint64_t _murmur_s_digest64(int, VALUE*, VALUE, uint64_t (*)(const char*, uint32_t, uint64_t));
61
+ void _murmur_s_digest128(int, VALUE*, VALUE, void*, void (*)(const char*, uint32_t, uint32_t, void*));
62
+
63
+ VALUE murmur1_finish(VALUE);
64
+ VALUE murmur1_s_digest(int, VALUE*, VALUE);
65
+ VALUE murmur1_s_rawdigest(int, VALUE*, VALUE);
66
+ VALUE murmur2_finish(VALUE);
67
+ VALUE murmur2_s_digest(int, VALUE*, VALUE);
68
+ VALUE murmur2_s_rawdigest(int, VALUE*, VALUE);
69
+ VALUE murmur2a_finish(VALUE);
70
+ VALUE murmur2a_s_digest(int, VALUE*, VALUE);
71
+ VALUE murmur2a_s_rawdigest(int, VALUE*, VALUE);
72
+ VALUE murmur64a_finish(VALUE);
73
+ VALUE murmur64a_s_digest(int, VALUE*, VALUE);
74
+ VALUE murmur64a_s_rawdigest(int, VALUE*, VALUE);
75
+ VALUE murmur64b_finish(VALUE);
76
+ VALUE murmur64b_s_digest(int, VALUE*, VALUE);
77
+ VALUE murmur64b_s_rawdigest(int, VALUE*, VALUE);
78
+ VALUE murmur_neutral2_finish(VALUE);
79
+ VALUE murmur_neutral2_s_digest(int, VALUE*, VALUE);
80
+ VALUE murmur_neutral2_s_rawdigest(int, VALUE*, VALUE);
81
+ VALUE murmur_aligned2_finish(VALUE);
82
+ VALUE murmur_aligned2_s_digest(int, VALUE*, VALUE);
83
+ VALUE murmur_aligned2_s_rawdigest(int, VALUE*, VALUE);
84
+ VALUE murmur3_x86_32_finish(VALUE);
85
+ VALUE murmur3_x86_32_s_digest(int, VALUE*, VALUE);
86
+ VALUE murmur3_x86_32_s_rawdigest(int, VALUE*, VALUE);
87
+ VALUE murmur3_x86_128_finish(VALUE);
88
+ VALUE murmur3_x86_128_s_digest(int, VALUE*, VALUE);
89
+ VALUE murmur3_x86_128_s_rawdigest(int, VALUE*, VALUE);
90
+ VALUE murmur3_x64_128_finish(VALUE);
91
+ VALUE murmur3_x64_128_s_digest(int, VALUE*, VALUE);
92
+ VALUE murmur3_x64_128_s_rawdigest(int, VALUE*, VALUE);
93
+
94
+ #endif /* ifndef MURMURHASH_INCLUDED */
@@ -2,7 +2,7 @@
2
2
  * MurmurHashNeutral2 (C) Austin Appleby
3
3
  */
4
4
 
5
- #include "murmurhash_neutral2.h"
5
+ #include "init.h"
6
6
 
7
7
  uint32_t
8
8
  murmur_hash_process_neutral2(const char * key, uint32_t len, uint32_t seed)
@@ -22,8 +22,8 @@ murmur_hash_process_neutral2(const char * key, uint32_t len, uint32_t seed)
22
22
  k |= data[2] << 16;
23
23
  k |= data[3] << 24;
24
24
 
25
- k *= m;
26
- k ^= k >> r;
25
+ k *= m;
26
+ k ^= k >> r;
27
27
  k *= m;
28
28
 
29
29
  h *= m;
@@ -1,73 +1,3 @@
1
- require "digest/simple"
2
-
3
- module Digest
4
- class MurmurHash < Simple
5
- def initialize
6
- @seed = self.class::DEFAULT_SEED
7
- super
8
- end
9
-
10
- def seed
11
- @seed
12
- end
13
-
14
- def seed=(s)
15
- fail ArgumentError, "seed string should #{digest_length * 16} bit chars" if s.bytesize != digest_length
16
- @seed = s
17
- end
18
-
19
- module Size32
20
- DEFAULT_SEED = "\x00\x00\x00\x00".encode('ASCII-8BIT')
21
-
22
- def digest_length
23
- 4
24
- end
25
-
26
- def to_i
27
- finish.unpack("I")[0]
28
- end
29
- end
30
-
31
- module Size64
32
- DEFAULT_SEED = "\x00\x00\x00\x00\x00\x00\x00\x00".encode('ASCII-8BIT')
33
-
34
- def digest_length
35
- 8
36
- end
37
-
38
- def to_i
39
- finish.unpack("L")[0]
40
- end
41
- end
42
- end
43
-
44
- class MurmurHash1 < MurmurHash
45
- include Size32
46
- end
47
-
48
- class MurmurHash2 < MurmurHash
49
- include Size32
50
- end
51
-
52
- class MurmurHash2A < MurmurHash
53
- include Size32
54
- end
55
-
56
- class MurmurHash64A < MurmurHash
57
- include Size64
58
- end
59
-
60
- class MurmurHash64B < MurmurHash
61
- include Size64
62
- end
63
-
64
- class MurmurHashAligned2 < MurmurHash
65
- include Size32
66
- end
67
-
68
- class MurmurHashNeutral2 < MurmurHash
69
- include Size32
70
- end
71
- end
72
-
73
- require "digest/murmurhash/murmurhash"
1
+ require "digest"
2
+ require "digest/murmurhash/base"
3
+ require "digest/murmurhash/ext"
@@ -0,0 +1,58 @@
1
+ module Digest
2
+ ds = Struct.new(:digest_length, :seed_length)
3
+ s1 = ds.new(4, 4)
4
+ s2 = ds.new(8, 8)
5
+ s3 = ds.new(16, 4)
6
+ {
7
+ '1' => s1,
8
+ '2' => s1,
9
+ '2A' => s1,
10
+ '64A' => s2,
11
+ '64B' => s2,
12
+ 'Aligned2' => s1,
13
+ 'Neutral2' => s1,
14
+ '3_x86_32' => s1,
15
+ '3_x86_128' => s3,
16
+ '3_x64_128' => s3,
17
+ }.each do |name, s|
18
+ class_eval %Q{
19
+ class MurmurHash#{name} < Digest::Class
20
+ DEFAULT_SEED = "#{"\x00" * s.seed_length}".encode('ASCII-8BIT')
21
+
22
+ def initialize
23
+ @buffer = ""
24
+ @seed = DEFAULT_SEED
25
+ end
26
+
27
+ def update(str)
28
+ @buffer += str
29
+ self
30
+ end
31
+ alias << update
32
+
33
+ def reset
34
+ @buffer.clear
35
+ @seed = DEFAULT_SEED
36
+ self
37
+ end
38
+
39
+ def seed
40
+ @seed
41
+ end
42
+
43
+ def seed=(s)
44
+ fail ArgumentError, "seed string should be #{s.seed_length} length" if #{s.seed_length} != s.length
45
+ @seed = s
46
+ end
47
+
48
+ def digest_length
49
+ #{s.digest_length}
50
+ end
51
+
52
+ def block_length
53
+ 0
54
+ end
55
+ end
56
+ }
57
+ end
58
+ end
@@ -3,7 +3,6 @@
3
3
  lib = File.expand_path('../../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
- require 'digest/stringbuffer'
7
6
  require 'digest/murmurhash'
8
7
  require 'benchmark'
9
8
 
@@ -58,7 +57,6 @@ def murmur_hash str, seed
58
57
  end
59
58
 
60
59
  n = 100000
61
- times_enum = n.times
62
60
 
63
61
  a = Array.new(n, 0)
64
62
  n.times do |i|
@@ -72,13 +70,16 @@ c = Struct.new "Cases",
72
70
  :func
73
71
  cases = [
74
72
  c.new("pureRuby", proc{|x| murmur_hash x, seed }),
75
- c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x, seed_str32 }),
76
- c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x, seed_str32 }),
77
- c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x, seed_str32 }),
78
- c.new("MurmurHash64A", proc{|x| Digest::MurmurHash64A.rawdigest x, seed_str64 }),
79
- c.new("MurmurHash64B", proc{|x| Digest::MurmurHash64B.rawdigest x, seed_str64 }),
80
- c.new("MurmurHashNeutral2", proc{|x| Digest::MurmurHashNeutral2.rawdigest x, seed_str32 }),
81
- c.new("MurmurHashAligned2", proc{|x| Digest::MurmurHashAligned2.rawdigest x, seed_str32 }),
73
+ c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.digest x, seed_str32 }),
74
+ c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.digest x, seed_str32 }),
75
+ c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.digest x, seed_str32 }),
76
+ c.new("MurmurHash64A", proc{|x| Digest::MurmurHash64A.digest x, seed_str64 }),
77
+ c.new("MurmurHash64B", proc{|x| Digest::MurmurHash64B.digest x, seed_str64 }),
78
+ c.new("MurmurHashNeutral2", proc{|x| Digest::MurmurHashNeutral2.digest x, seed_str32 }),
79
+ c.new("MurmurHashAligned2", proc{|x| Digest::MurmurHashAligned2.digest x, seed_str32 }),
80
+ c.new("MurmurHash3_x86_32", proc{|x| Digest::MurmurHash3_x86_32.digest x, seed_str32 }),
81
+ c.new("MurmurHash3_x86_128", proc{|x| Digest::MurmurHash3_x86_128.digest x, seed_str32 }),
82
+ c.new("MurmurHash3_x64_128", proc{|x| Digest::MurmurHash3_x64_128.digest x, seed_str32 }),
82
83
  ]
83
84
  reals = {}
84
85
  confrict = {}
@@ -94,20 +95,25 @@ puts
94
95
  puts "```"
95
96
  Benchmark.bm do |x|
96
97
  cases.each do |c|
98
+ i = 0
97
99
  z = x.report c.name do
98
- times_enum.each do |i|
100
+ while i < n
99
101
  c.func.call(a[i])
102
+ i += 1
100
103
  end
101
104
  end
102
105
 
103
106
  confrict.clear
104
- times_enum.each do |i|
107
+
108
+ i = 0
109
+ while i < n
105
110
  rethash = c.func.call(a[i])
106
111
  if confrict[rethash].nil?
107
112
  confrict[rethash] = 0
108
- else
113
+ else
109
114
  confrict[rethash] += 1
110
115
  end
116
+ i += 1
111
117
  end
112
118
  reals[c.name] = z.real
113
119
  confricts[c.name] = confrict.count{|hash, count| 0 < count}
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Digest::MurmurHash do
3
+ describe "Digest::MurmurHash" do
4
4
  it "seed" do
5
5
  all_classes.each do |c|
6
6
  m = c.new
@@ -13,7 +13,7 @@ describe Digest::MurmurHash do
13
13
  m = c.new
14
14
  m.update "murmurhash"
15
15
  before_digest = m.hexdigest
16
- m.seed = (c::DEFAULT_SEED.length == 4) ? seed32 : seed64
16
+ m.seed = seed_str(c)
17
17
  expect(c::DEFAULT_SEED).not_to eq(m.seed)
18
18
  expect(before_digest).not_to eq(m.hexdigest)
19
19
  end
@@ -25,45 +25,59 @@ describe Digest::MurmurHash do
25
25
  end
26
26
  end
27
27
 
28
- it "digest and hexdigest" do
29
- expect(Digest::MurmurHash1.hexdigest("abc")).to eq("dc5f5755")
28
+ context "static hexdigest" do
29
+ it { expect(Digest::MurmurHash1.hexdigest("abc")).to eq("dc5f5755") }
30
+ it { expect(Digest::MurmurHash2.hexdigest("abc")).to eq("9b7c5713") }
31
+ it { expect(Digest::MurmurHash2A.hexdigest("abc")).to eq("679f5811") }
32
+ it { expect(Digest::MurmurHash64A.hexdigest("abc")).to eq("fb5ea99834c3c99c") }
33
+ it { expect(Digest::MurmurHash64B.hexdigest("abc")).to eq("9d595cce51420da6") }
34
+ it { expect(Digest::MurmurHashAligned2.hexdigest("abc")).to eq("9b7c5713") }
35
+ it { expect(Digest::MurmurHashNeutral2.hexdigest("abc")).to eq("9b7c5713") }
36
+ it { expect(Digest::MurmurHash3_x86_32.hexdigest("abc")).to eq("fa93ddb3") }
37
+ it { expect(Digest::MurmurHash3_x86_128.hexdigest("abc")).to eq("a506b0a2a506b0a2a506b0a2d1c6cd75") }
38
+ it { expect(Digest::MurmurHash3_x64_128.hexdigest("abc")).to eq("4174a23b522dca263f3f96b46778ad3f") }
39
+ end
40
+
41
+ context "digest and hexdigest" do
30
42
  all_classes.each do |c|
31
43
  [:digest, :hexdigest].each do |method|
32
44
  str = "a" * 1024
33
45
  d = c.send(method, str)
34
- d2 = c.send(method, str, (c::DEFAULT_SEED.length == 4) ? seed32 : seed64)
35
- expect(d).to be_a_kind_of(String)
36
- expect(d2).to be_a_kind_of(String)
37
- expect(d.length).to be > 0
38
- expect(d2.length).to be > 0
39
- expect(d != d2).to be_truthy
46
+ d2 = c.send(method, str, seed_str(c))
47
+ it ("#{c}##{method}") do
48
+ expect(d).to be_a_kind_of(String)
49
+ expect(d2).to be_a_kind_of(String)
50
+ expect(d.length).to be > 0
51
+ expect(d2.length).to be > 0
52
+ expect(d != d2).to be_truthy
53
+ end
40
54
  end
41
55
  end
42
56
  end
43
57
 
44
- it "rawdigest" do
58
+ describe "rawdigest" do
45
59
  all_classes.each do |c|
46
60
  str = "a" * 1024
61
+ seed = seed_str(c)
47
62
  d = c.rawdigest str
48
- d2 = c.rawdigest str, (c::DEFAULT_SEED.length == 4) ? seed32 : seed64
49
- expect(d).to be_a_kind_of(Integer)
50
- expect(d2).to be_a_kind_of(Integer)
51
- expect(d).to be > 0
52
- expect(d2).to be > 0
53
- expect(d != d2).to be_truthy
63
+ d2 = c.rawdigest str, seed
64
+ it(c) do
65
+ expect(d != d2).to be_truthy
66
+ end
54
67
  end
55
68
  end
56
69
 
57
- it "update and reset and hexdigest(32bit)" do
70
+ describe "update and reset and hexdigest" do
58
71
  all_classes.each do |c|
59
72
  murmur = c.new
60
73
  murmur.update("m").update("u").update("r")
61
74
  murmur << "m" << "u" << "r"
62
75
  murmur << "hash"
63
76
  hex = murmur.hexdigest
64
- expect(murmur.hexdigest! == hex).to be true
65
- reset_str = "0" * ((c::DEFAULT_SEED.length == 4) ? 8 : 16)
66
- expect(murmur.hexdigest).to eq(reset_str)
77
+ it(c) do
78
+ expect(murmur.hexdigest! == hex).to be true
79
+ expect(murmur.hexdigest).to eq("0" * (murmur.digest_length*2))
80
+ end
67
81
  end
68
82
  end
69
83
 
@@ -89,15 +103,13 @@ describe Digest::MurmurHash do
89
103
  end
90
104
  end
91
105
 
92
- it "length" do
93
- all_classes.each do |c|
94
- expect(c.new.length == c::DEFAULT_SEED.length).to be_truthy
95
- end
96
- end
97
-
98
- it "to_i" do
106
+ it "reset" do
99
107
  all_classes.each do |c|
100
- expect(c.new.update("murmurhash").to_i).to be_a_kind_of(Integer)
108
+ m1 = c.new
109
+ m2 = c.new
110
+ m1.update("murmurhash")
111
+ m1.seed = seed_str(c)
112
+ expect(m1.reset).to eq(m2)
101
113
  end
102
114
  end
103
115
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe MurmurHash do
3
+ describe "Digest::MurmurHash" do
4
4
  it "update nil" do
5
5
  all_classes.each do |c|
6
6
  murmur = c.new
@@ -14,22 +14,27 @@ describe MurmurHash do
14
14
  end
15
15
  end
16
16
 
17
- it "seed digest" do
17
+ describe "seed digest" do
18
18
  all_classes.each do |c|
19
- expect{ c.digest "", ""}.to raise_error(ArgumentError)
20
- expect{ c.digest "", "\x00" }.to raise_error(ArgumentError)
21
- seed = (c::DEFAULT_SEED.bytesize == 4) ? seed32 : seed64
22
- expect{ c.digest "", seed }.to_not raise_error
19
+ it (c) do
20
+ expect{ c.digest "", ""}.to raise_error(ArgumentError)
21
+ expect{ c.digest "", "\x00" * (c::DEFAULT_SEED.length )}.to_not raise_error
22
+ expect{ c.digest "", "\x00" * (c::DEFAULT_SEED.length+1)}.to raise_error(ArgumentError)
23
+ expect{ c.digest "", seed_str(c) }.to_not raise_error
24
+ end
23
25
  end
24
26
  end
25
27
 
26
- it "seed instance" do
28
+ describe "seed instance" do
27
29
  all_classes.each do |c|
28
- murmur = c.new
29
- expect{ murmur.seed = "" }.to raise_error(ArgumentError)
30
- expect{ murmur.seed = "\x00" }.to raise_error(ArgumentError)
31
- seed = (c::DEFAULT_SEED.bytesize == 4) ? seed32 : seed64
32
- expect{ murmur.seed = seed }.to_not raise_error
30
+ it (c) do
31
+ murmur = c.new
32
+ expect{ murmur.seed = "" }.to raise_error(ArgumentError)
33
+ expect{ murmur.seed = "\x00" * (c::DEFAULT_SEED.length ) }.to_not raise_error
34
+ expect{ murmur.seed = "\x00" * (c::DEFAULT_SEED.length+1) }.to raise_error(ArgumentError)
35
+ seed = seed_str(c)
36
+ expect{ murmur.seed = seed }.to_not raise_error
37
+ end
33
38
  end
34
39
  end
35
40
  end