digest-murmurhash 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f158d21ca468ee7cccb9c39ba6fc57660850edd
4
- data.tar.gz: 82730d511f8136b2ff960c0d2d99a62c9a78c339
3
+ metadata.gz: 635a6c34802609c512789d68c9ebe571df48cc01
4
+ data.tar.gz: 1a1884fb59d6a9ccfca3c5da969e688589c093f0
5
5
  SHA512:
6
- metadata.gz: 4494bc8ee54eb5bc525c9c8d5a8f275c299b73350fd0f6fcb638e7272fb546cab8d5bd5867cab5f388845ddc2eeec11302c8d982ea1e0e5bb33cc2b50b127d28
7
- data.tar.gz: 20cc99086cc26e5ba616a9d179aa11121f88704a8effb6a417dd3cc58032b8c0fb89a21818c1bf790bfd7dde3b94a0477cd67d74d0fe860c882e339ddfd3c1b8
6
+ metadata.gz: 2633494b6b099a2149deeb3c39af1eb81e80435156bc459b4aee9498ca5209f41ac5a400713b6ae6c762935c5a1d964bc408e63a6e21e0634778d4a78c63f329
7
+ data.tar.gz: 9c61561046045532d0c8fe863059625cb532d051ea978f16fc63b0bd7fada9fded44900d91642695253d50fd94344c9444d35d5e6715b2956c2078713bcd9fd2
data/README.md CHANGED
@@ -2,26 +2,39 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/ksss/digest-murmurhash.png?branch=master)](https://travis-ci.org/ksss/digest-murmurhash)
4
4
 
5
- Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit) desiged by Austin Appleby.
5
+ **MurmurHash** is a algorithm desiged by Austin Appleby.
6
6
 
7
- Digest::MurmurHash compliance Digest API of Ruby.
7
+ **Digest::MurmurHash1** use algorithm MurmurHash1(32-bit).
8
8
 
9
- ## Usage
9
+ **Digest::MurmurHash2** use algorithm MurmurHash2(32-bit).
10
+
11
+ (**Digest::MurmurHash** exist to write only version number)
10
12
 
11
- All interface compliance Digest::Base.
13
+ All classes compliance Digest API of Ruby.
14
+
15
+ ## Usage
12
16
 
13
- You can use same interface built in Digest::XXX classes.
17
+ You can use same interface built-in Digest::XXX classes.
14
18
 
15
19
  ```ruby
16
20
  require 'digest/murmurhash'
17
21
 
18
- p Digest::MurmurHash.hexdigest('murmurhash') #=> 'c709abd5'
19
- p Digest::MurmurHash.file("./LICENSE.txt").hexdigest #=> '712e9641'
22
+ # MurmurHash1 can use like same than Digest::XXX.
23
+
24
+ p Digest::MurmurHash1.hexdigest('murmurhash') #=> 'c709abd5'
25
+ p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> '712e9641'
26
+
27
+ # and MurmurHash2 too. but return another value because using another algorithm.
28
+
29
+ p Digest::MurmurHash2.hexdigest('murmurhash') #=> '33f67c7e'
30
+ p Digest::MurmurHash2.file("./LICENSE.txt").hexdigest #=> '78678326'
20
31
  ```
21
32
 
22
33
  ## Class tree
23
34
 
24
- Digest::MurmurHash < Digest::Base < Digest::Instance < Digest::Class < Object < Kernel < BasicObject
35
+ **Digest::MurmurHash1** < Digest::StringBuffer
36
+
37
+ **Digest::MurmurHash2** < Digest::StringBuffer
25
38
 
26
39
  ## Installation
27
40
 
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ require 'rake/extensiontask'
14
14
  spec = Bundler::GemHelper.gemspec
15
15
  Rake::ExtensionTask.new('murmurhash', spec) do |ext|
16
16
  ext.ext_dir = 'ext/digest/murmurhash'
17
- ext.lib_dir = 'lib/digest'
17
+ ext.lib_dir = 'lib/digest/murmurhash'
18
18
  end
19
19
 
20
20
 
@@ -1,16 +1,15 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- # require 'digest/murmurhash/version'
4
+ require 'digest/murmurhash/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "digest-murmurhash"
8
- # FIXME replace Digest::MurmurHash::VERSION but raise NameError
9
- spec.version = "0.1.0"
8
+ spec.version = Digest::MurmurHash::VERSION
10
9
  spec.author = "ksss"
11
10
  spec.email = "co000ri@gmail.com"
12
- spec.description = %q{Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit) desiged by Austin Appleby.}
13
- spec.summary = %q{Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit) desiged by Austin Appleby.}
11
+ spec.description = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
12
+ spec.summary = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
14
13
  spec.homepage = ""
15
14
  spec.license = "MIT"
16
15
 
@@ -23,5 +22,6 @@ Gem::Specification.new do |spec|
23
22
  spec.add_development_dependency "bundler", "~> 1.3"
24
23
  spec.add_development_dependency "rake"
25
24
  spec.add_development_dependency "rspec", ['~> 2.11']
26
- spec.add_development_dependency "rake-compiler", ["~> 0.8.3"]
25
+ spec.add_development_dependency "rake-compiler", ["~> 0.9.2"]
26
+ spec.add_development_dependency "digest-stringbuffer", ["~> 0.0.2"]
27
27
  end
@@ -4,4 +4,4 @@ have_header('ruby/digest.h')
4
4
 
5
5
  $preload = %w[digest]
6
6
 
7
- create_makefile('digest/murmurhash')
7
+ create_makefile('digest/murmurhash/murmurhash')
@@ -0,0 +1,34 @@
1
+ #include "murmurhash.h"
2
+ #include "murmurhash1.h"
3
+ #include "murmurhash2.h"
4
+ #include "murmurhash2a.h"
5
+
6
+ void
7
+ Init_murmurhash(void)
8
+ {
9
+ VALUE mDigest, cDigest_StringBuffer;
10
+ VALUE cDigest_MurmurHash1, cDigest_MurmurHash2, cDigest_MurmurHash2A;
11
+
12
+ /* Digest::MurmurHash is require that Digest::StringBuffer */
13
+ rb_require("digest/stringbuffer");
14
+ mDigest = rb_path2class("Digest");
15
+ cDigest_StringBuffer = rb_path2class("Digest::StringBuffer");
16
+
17
+ /* class Digest::MurmurHash1 < Digest::StringBuffer */
18
+ cDigest_MurmurHash1 = rb_define_class_under(mDigest, "MurmurHash1", cDigest_StringBuffer);
19
+ rb_define_private_method(cDigest_MurmurHash1, "finish", murmur1_finish, 0);
20
+ rb_define_method(cDigest_MurmurHash1, "to_i", murmur1_to_i, 0);
21
+ rb_define_singleton_method(cDigest_MurmurHash1, "rawdigest", murmur1_s_rawdigest, -1);
22
+
23
+ /* class Digest::MurmurHash2 < Digest::StringBuffer */
24
+ cDigest_MurmurHash2 = rb_define_class_under(mDigest, "MurmurHash2", cDigest_StringBuffer);
25
+ rb_define_private_method(cDigest_MurmurHash2, "finish", murmur2_finish, 0);
26
+ rb_define_method(cDigest_MurmurHash2, "to_i", murmur2_to_i, 0);
27
+ rb_define_singleton_method(cDigest_MurmurHash2, "rawdigest", murmur2_s_rawdigest, -1);
28
+
29
+ /* class Digest::MurmurHash2A < Digest::StringBuffer */
30
+ cDigest_MurmurHash2A = rb_define_class_under(mDigest, "MurmurHash2A", cDigest_StringBuffer);
31
+ rb_define_private_method(cDigest_MurmurHash2A, "finish", murmur2a_finish, 0);
32
+ rb_define_method(cDigest_MurmurHash2A, "to_i", murmur2a_to_i, 0);
33
+ rb_define_singleton_method(cDigest_MurmurHash2A, "rawdigest", murmur2a_s_rawdigest, -1);
34
+ }
@@ -0,0 +1,23 @@
1
+ #ifndef MURMURHASH_INCLUDED
2
+ # define MURMURHASH_INCLUDED
3
+
4
+ #include "ruby.h"
5
+
6
+ #define MURMURHASH_MAGIC 0x5bd1e995
7
+
8
+ /* should be same type structure to digest/stringbuffer */
9
+ typedef struct {
10
+ char* buffer;
11
+ char* p;
12
+ size_t memsize;
13
+ } buffer_t;
14
+
15
+ #define MURMURHASH(self, name) \
16
+ buffer_t* name; \
17
+ Data_Get_Struct(self, buffer_t, name); \
18
+ if (name == NULL) { \
19
+ rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
20
+ }
21
+
22
+ #endif /* ifndef MURMURHASH_INCLUDED */
23
+
@@ -0,0 +1,86 @@
1
+ /*
2
+ * MurmurHash1 (C) Austin Appleby
3
+ */
4
+
5
+ #include "murmurhash1.h"
6
+
7
+ static inline size_t
8
+ murmur1(uint32_t h, const uint8_t r)
9
+ {
10
+ const uint32_t m = MURMURHASH_MAGIC;
11
+ h *= m;
12
+ h ^= h >> r;
13
+ return h;
14
+ }
15
+
16
+ static uint32_t
17
+ murmur_hash_process1(const char *data, uint32_t length)
18
+ {
19
+ const uint32_t m = MURMURHASH_MAGIC;
20
+ const uint8_t r = 16;
21
+ uint32_t h;
22
+
23
+ h = length * m;
24
+
25
+ while (4 <= length) {
26
+ h += *(uint32_t*)data;
27
+ h = murmur1(h, r);
28
+ data += 4;
29
+ length -= 4;
30
+ }
31
+
32
+ switch (length) {
33
+ case 3:
34
+ h += data[2] << 16;
35
+ case 2:
36
+ h += data[1] << 8;
37
+ case 1:
38
+ h += data[0];
39
+ h = murmur1(h, r);
40
+ }
41
+
42
+ h = murmur1(h, 10);
43
+ h = murmur1(h, 17);
44
+
45
+ return h;
46
+ }
47
+
48
+ VALUE
49
+ murmur1_finish(VALUE self)
50
+ {
51
+ uint32_t h;
52
+ uint8_t digest[4];
53
+ MURMURHASH(self, ptr);
54
+
55
+ h = murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer);
56
+
57
+ digest[0] = h >> 24;
58
+ digest[1] = h >> 16;
59
+ digest[2] = h >> 8;
60
+ digest[3] = h;
61
+
62
+ return rb_str_new((const char*) digest, 4);
63
+ }
64
+
65
+ VALUE
66
+ murmur1_to_i(VALUE self)
67
+ {
68
+ MURMURHASH(self, ptr);
69
+ return UINT2NUM(murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer));
70
+ }
71
+
72
+ VALUE
73
+ murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass)
74
+ {
75
+ VALUE str;
76
+
77
+ if (argc < 1)
78
+ rb_raise(rb_eArgError, "no data given");
79
+
80
+ str = *argv++;
81
+ argc--;
82
+
83
+ StringValue(str);
84
+
85
+ return UINT2NUM(murmur_hash_process1(RSTRING_PTR(str), RSTRING_LEN(str)));
86
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef MURMURHASH1_INCLUDED
2
+ # define MURMURHASH1_INCLUDED
3
+
4
+ #include "murmurhash.h"
5
+
6
+ VALUE murmur1_finish(VALUE self);
7
+ VALUE murmur1_to_i(VALUE self);
8
+ VALUE murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass);
9
+
10
+ #endif /* ifndef MURMURHASH1_INCLUDED */
11
+
@@ -0,0 +1,90 @@
1
+ /*
2
+ * MurmurHash2 (C) Austin Appleby
3
+ */
4
+
5
+ #include "murmurhash2.h"
6
+
7
+ static inline size_t
8
+ murmur2(uint32_t h, uint32_t k, const uint8_t r)
9
+ {
10
+ const uint32_t m = MURMURHASH_MAGIC;
11
+ k *= m;
12
+ k ^= k >> r;
13
+ k *= m;
14
+
15
+ h *= m;
16
+ h ^= k;
17
+ return h;
18
+ }
19
+
20
+ static uint32_t
21
+ murmur_hash_process2(const char *data, uint32_t length)
22
+ {
23
+ const uint32_t m = MURMURHASH_MAGIC;
24
+ const uint8_t r = 24;
25
+ uint32_t h, k;
26
+
27
+ h = length * m;
28
+
29
+ while (4 <= length) {
30
+ k = *(uint32_t*)data;
31
+ h = murmur2(h, k, r);
32
+ data += 4;
33
+ length -= 4;
34
+ }
35
+
36
+ switch (length) {
37
+ case 3: h ^= data[2] << 16;
38
+ case 2: h ^= data[1] << 8;
39
+ case 1: h ^= data[0];
40
+ h *= m;
41
+ }
42
+
43
+ h ^= h >> 13;
44
+ h *= m;
45
+ h ^= h >> 15;
46
+
47
+ return h;
48
+ }
49
+
50
+ VALUE
51
+ murmur2_finish(VALUE self)
52
+ {
53
+ uint32_t h;
54
+ uint8_t digest[4];
55
+ MURMURHASH(self, ptr);
56
+
57
+ h = murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer);
58
+
59
+ digest[0] = h >> 24;
60
+ digest[1] = h >> 16;
61
+ digest[2] = h >> 8;
62
+ digest[3] = h;
63
+
64
+ return rb_str_new((const char*) digest, 4);
65
+ }
66
+
67
+ VALUE
68
+ murmur2_to_i(VALUE self)
69
+ {
70
+ MURMURHASH(self, ptr);
71
+ return UINT2NUM(murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer));
72
+ }
73
+
74
+ VALUE
75
+ murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
76
+ {
77
+ VALUE str;
78
+ volatile VALUE obj;
79
+
80
+ if (argc < 1)
81
+ rb_raise(rb_eArgError, "no data given");
82
+
83
+ str = *argv++;
84
+ argc--;
85
+
86
+ StringValue(str);
87
+
88
+ return UINT2NUM(murmur_hash_process2(RSTRING_PTR(str), RSTRING_LEN(str)));
89
+ }
90
+
@@ -0,0 +1,11 @@
1
+ #ifndef MURMURHASH2_INCLUDED
2
+ # define MURMURHASH2_INCLUDED
3
+
4
+ #include "murmurhash.h"
5
+
6
+ VALUE murmur2_finish(VALUE self);
7
+ VALUE murmur2_to_i(VALUE self);
8
+ VALUE murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
9
+
10
+ #endif /* ifndef MURMURHASH2_INCLUDED */
11
+
@@ -0,0 +1,84 @@
1
+ /*
2
+ * MurmurHash2A (C) Austin Appleby
3
+ */
4
+
5
+ #include "murmurhash2a.h"
6
+
7
+ #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
8
+
9
+ static uint32_t
10
+ murmur_hash_process2a(const void *key, uint32_t length)
11
+ {
12
+ const uint32_t m = MURMURHASH_MAGIC;
13
+ const uint8_t r = 24;
14
+ uint32_t h, k, t, l;
15
+ const unsigned char *data = (const unsigned char *) key;
16
+
17
+ l = length;
18
+ h = 0 ^ length;
19
+
20
+ while (4 <= length) {
21
+ k = *(uint32_t*)data;
22
+ mmix(h,k);
23
+ data += 4;
24
+ length -= 4;
25
+ }
26
+
27
+ t = 0;
28
+ switch (length) {
29
+ case 3: t ^= data[2] << 16;
30
+ case 2: t ^= data[1] << 8;
31
+ case 1: t ^= data[0];
32
+ }
33
+
34
+ mmix(h,t);
35
+ mmix(h,l);
36
+
37
+ h ^= h >> 13;
38
+ h *= m;
39
+ h ^= h >> 15;
40
+
41
+ return h;
42
+ }
43
+
44
+ VALUE
45
+ murmur2a_finish(VALUE self)
46
+ {
47
+ uint32_t h;
48
+ uint8_t digest[4];
49
+ MURMURHASH(self, ptr);
50
+
51
+ h = murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer);
52
+
53
+ digest[0] = h >> 24;
54
+ digest[1] = h >> 16;
55
+ digest[2] = h >> 8;
56
+ digest[3] = h;
57
+
58
+ return rb_str_new((const char*) digest, 4);
59
+ }
60
+
61
+ VALUE
62
+ murmur2a_to_i(VALUE self)
63
+ {
64
+ MURMURHASH(self, ptr);
65
+ return UINT2NUM(murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer));
66
+ }
67
+
68
+ VALUE
69
+ murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
70
+ {
71
+ VALUE str;
72
+ volatile VALUE obj;
73
+
74
+ if (argc < 1)
75
+ rb_raise(rb_eArgError, "no data given");
76
+
77
+ str = *argv++;
78
+ argc--;
79
+
80
+ StringValue(str);
81
+
82
+ return UINT2NUM(murmur_hash_process2a(RSTRING_PTR(str), RSTRING_LEN(str)));
83
+ }
84
+
@@ -0,0 +1,11 @@
1
+ #ifndef MURMURHASH2A_INCLUDED
2
+ # define MURMURHASH2A_INCLUDED
3
+
4
+ #include "murmurhash.h"
5
+
6
+ VALUE murmur2a_finish(VALUE self);
7
+ VALUE murmur2a_to_i(VALUE self);
8
+ VALUE murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
9
+
10
+ #endif /* ifndef MURMURHASH2A_INCLUDED */
11
+
@@ -0,0 +1,6 @@
1
+ begin
2
+ require "digest/murmurhash/#{RUBY_VERSION[/\d+.\d+/]}/murmurhash"
3
+ rescue LoadError
4
+ require "digest/murmurhash/murmurhash"
5
+ end
6
+ require "digest/murmurhash/version"
@@ -1,5 +1,5 @@
1
1
  module Digest
2
2
  class MurmurHash
3
- VERSION = "0.1.0"
3
+ VERSION = "0.2.1"
4
4
  end
5
5
  end
data/spec/bench.rb CHANGED
@@ -3,9 +3,24 @@
3
3
  lib = File.expand_path('../../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
+ require 'digest/stringbuffer'
6
7
  require 'digest/murmurhash'
7
8
  require 'benchmark'
8
9
 
10
+ class Prime37 < Digest::StringBuffer
11
+ def initialize
12
+ @prime = 37
13
+ end
14
+
15
+ def finish
16
+ result = 0
17
+ buffer.unpack("C*").each do |c|
18
+ result += (c * @prime)
19
+ end
20
+ [result & 0xffffffff].pack("N")
21
+ end
22
+ end
23
+
9
24
  class Integer
10
25
  def to_32
11
26
  self & 0xffffffff
@@ -44,30 +59,82 @@ def murmur_hash str
44
59
  h = (h * m).to_32
45
60
  h ^= h >> 17
46
61
 
47
- h.to_32
62
+ h
48
63
  end
49
64
 
65
+ @rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
50
66
  def rand_str
51
67
  rand = "";
52
- File.open("/dev/urandom").read(20).each_byte{|x| rand << sprintf("%02x",x)}
68
+ 20.times {
69
+ rand << @rands[rand(62)]
70
+ }
53
71
  rand
54
72
  end
55
73
 
56
- s = rand_str
57
- p [murmur_hash(s)].pack("N")
58
- p Digest::MurmurHash.digest(s)
74
+ n = 100000
75
+ times_enum = n.times
76
+
77
+ a = Array.new(n, 0)
78
+ n.times do |i|
79
+ a[i] = rand_str
80
+ end
81
+
82
+ c = Struct.new "Cases",
83
+ :name,
84
+ :func
85
+ cases = [
86
+ c.new("pureRuby", proc{|x| murmur_hash x }),
87
+ c.new("Prime37", proc{|x| Prime37.digest x }),
88
+ c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x }),
89
+ c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x }),
90
+ c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x }),
91
+ ]
92
+
93
+ reals = {}
94
+ confrict = {}
95
+ confricts = {}
59
96
 
97
+ puts "### condition"
98
+ puts
99
+ puts " RUBY_VERSION = #{RUBY_VERSION}"
100
+ puts " count = #{n}"
101
+ puts
102
+ puts "### benchmark"
103
+ puts
104
+ puts "```"
60
105
  Benchmark.bm do |x|
61
- n = 10000
62
- a = []
63
- n.times { |i|
64
- a[i] = rand_str
65
- }
106
+ cases.each do |c|
107
+ z = x.report c.name do
108
+ times_enum.each do |i|
109
+ c.func.call(a[i])
110
+ end
111
+ end
112
+
113
+ confrict.clear
114
+ times_enum.each do |i|
115
+ rethash = c.func.call(a[i])
116
+ if confrict[rethash].nil?
117
+ confrict[rethash] = 0
118
+ else
119
+ confrict[rethash] += 1
120
+ end
121
+ end
122
+ reals[c.name] = z.real
123
+ confricts[c.name] = confrict.count{|hash, count| 0 < count}
124
+ end
125
+ end
126
+ puts "```"
127
+
128
+ puts
129
+ puts "### real second rate (pureRuby/)"
130
+ puts
131
+ reals.each do |name, real|
132
+ puts " " + (reals["pureRuby"] / real).to_s + "/" + name
133
+ end
66
134
 
67
- x.report {n.times{ |i|
68
- [murmur_hash(a[i])].pack("N")
69
- }}
70
- x.report {n.times{ |i|
71
- Digest::MurmurHash.digest(a[i])
72
- }}
135
+ puts
136
+ puts "### confrict count (/#{n})"
137
+ puts
138
+ confricts.each do |name, count|
139
+ puts " #{name}: #{count}"
73
140
  end
data/spec/digest_spec.rb CHANGED
@@ -1,60 +1,82 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Digest::MurmurHash do
4
- let :murmur do
5
- Digest::MurmurHash.new
3
+ describe "Digest::MurmurHash1 and 2" do
4
+ let :all do
5
+ [MurmurHash1, MurmurHash2, MurmurHash2A]
6
6
  end
7
7
 
8
8
  it "initialize" do
9
- expect(Digest::MurmurHash.new).to be_a_kind_of(Digest::Base)
9
+ expect(MurmurHash1.new).to be_a_kind_of(Digest::StringBuffer)
10
+ expect(MurmurHash2.new).to be_a_kind_of(Digest::StringBuffer)
11
+ expect(MurmurHash2A.new).to be_a_kind_of(Digest::StringBuffer)
12
+ end
13
+
14
+ it "digest" do
15
+ expect(MurmurHash1.digest("a" * 1024)).to eq("\xa1\x52\x2e\x5f".force_encoding("ASCII-8BIT"))
16
+ expect(MurmurHash2.digest("a" * 1024)).to eq("\xd0\x0c\x31\x2f".force_encoding("ASCII-8BIT"))
17
+ expect(MurmurHash2A.digest("a" * 1024)).to eq("\xd5\x2d\xb1\x67".force_encoding("ASCII-8BIT"))
10
18
  end
11
19
 
12
20
  it "hexdigest" do
13
- expect(Digest::MurmurHash.hexdigest("a" * 1024)).to eq("a1522e5f")
21
+ expect(MurmurHash1.hexdigest("a" * 1024)).to eq("a1522e5f")
22
+ expect(MurmurHash2.hexdigest("a" * 1024)).to eq("d00c312f")
23
+ expect(MurmurHash2A.hexdigest("a" * 1024)).to eq("d52db167")
14
24
  end
15
25
 
16
- it "update and reset and hexdigest" do
17
- murmur.update("m")
18
- murmur.update("u")
19
- murmur.update("r")
20
- murmur.update("m")
21
- murmur.update("u")
22
- murmur.update("r")
23
- murmur.update("hash")
24
- expect(murmur.hexdigest).to eq("c709abd5");
25
- expect(murmur.hexdigest).to eq("c709abd5");
26
- expect(murmur.hexdigest!).to eq("c709abd5");
27
- expect(murmur.hexdigest).to eq("00000000");
26
+ it "rawdigest" do
27
+ expect(MurmurHash1.rawdigest("a" * 1024)).to eq(0xa1522e5f)
28
+ expect(MurmurHash2.rawdigest("a" * 1024)).to eq(0xd00c312f)
29
+ expect(MurmurHash2A.rawdigest("a" * 1024)).to eq(0xd52db167)
28
30
  end
29
31
 
30
- it "dup" do
31
- murmur1 = Digest::MurmurHash.new
32
- murmur2 = Digest::MurmurHash.new
33
- murmur1.update("murmur")
34
- murmur2 = murmur1.dup
35
- murmur2.update("hash")
36
- expect(murmur2.hexdigest).to eq("c709abd5")
32
+ it "update and reset and hexdigest" do
33
+ {
34
+ MurmurHash1 => "c709abd5",
35
+ MurmurHash2 => "33f67c7e",
36
+ MurmurHash2A => "df25554b",
37
+ }.each do |c, should|
38
+ murmur = c.new
39
+ murmur.update("m").update("u").update("r")
40
+ murmur << "m" << "u" << "r"
41
+ murmur << "hash"
42
+ expect(murmur.hexdigest).to eq(should);
43
+ expect(murmur.hexdigest).to eq(should);
44
+ expect(murmur.hexdigest!).to eq(should);
45
+ expect(murmur.hexdigest).to eq("00000000");
46
+ end
37
47
  end
38
48
 
39
49
  it "==" do
40
- ["", "murmur", "murmurhash" * 1024].each do |str|
41
- murmur1 = Digest::MurmurHash.new
42
- murmur2 = Digest::MurmurHash.new
43
- expect(murmur1.update(str) == murmur2.update(str)).to be_true
50
+ all.each do |c|
51
+ ["", "murmur", "murmurhash" * 1024].each do |str|
52
+ murmur1 = c.new
53
+ murmur2 = c.new
54
+ expect(murmur1.update(str) == murmur2.update(str)).to be_true
55
+ end
44
56
  end
45
57
  end
46
58
 
47
- it "length" do
48
- expect(murmur.length).to eq(4);
59
+ it "dup" do
60
+ all.each do |c|
61
+ murmur1 = c.new
62
+ murmur2 = c.new
63
+ 10.times {
64
+ murmur1 = murmur1.update("murmurhash" * 100).dup
65
+ }
66
+ murmur2.update(("murmurhash" * 100) * 10)
67
+ expect(murmur1 == murmur2).to be_true
68
+ end
49
69
  end
50
70
 
51
- it "block_length" do
52
- # MurmurHash don't use block.
53
- # Therefore `block_length` return chunk size for calculate MurmurHash (equal 4)
54
- expect(murmur.block_length).to eq(4);
71
+ it "length" do
72
+ expect(MurmurHash1.new.length).to eq(4);
73
+ expect(MurmurHash2.new.length).to eq(4);
74
+ expect(MurmurHash2A.new.length).to eq(4);
55
75
  end
56
76
 
57
77
  it "to_i" do
58
- expect(murmur.update("murmurhash").to_i).to eq(0xc709abd5);
78
+ expect(MurmurHash1.new.update("murmurhash").to_i).to eq(0xc709abd5);
79
+ expect(MurmurHash2.new.update("murmurhash").to_i).to eq(0x33f67c7e);
80
+ expect(MurmurHash2A.new.update("murmurhash").to_i).to eq(0xdf25554b);
59
81
  end
60
82
  end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+
3
+ describe MurmurHash do
4
+ let :all do
5
+ [MurmurHash1, MurmurHash2, MurmurHash2A]
6
+ end
7
+
8
+ it "update nil" do
9
+ all.each do |c|
10
+ murmur = c.new
11
+ expect{ murmur.update }.to raise_error(ArgumentError)
12
+ end
13
+ end
14
+
15
+ it "rawdigest no arguments" do
16
+ all.each do |c|
17
+ expect{ c.rawdigest }.to raise_error(ArgumentError)
18
+ end
19
+ end
20
+ end
data/spec/mem_spec.rb CHANGED
@@ -1,12 +1,18 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Digest::MurmurHash do
3
+ describe MurmurHash do
4
4
  it "gc safe" do
5
- murmur = Digest::MurmurHash.new
6
- GC.start
7
- murmur.update("murmur")
8
- GC.start
9
- expect(murmur.update("hash").to_s).to eq("c709abd5");
5
+ {
6
+ MurmurHash1 => "c709abd5",
7
+ MurmurHash2 => "33f67c7e",
8
+ MurmurHash2A => "df25554b",
9
+ }.each do |c, should|
10
+ murmur = c.new
11
+ GC.start
12
+ murmur.update("murmur")
13
+ GC.start
14
+ expect(murmur.update("hash").to_s).to eq(should);
15
+ end
10
16
  end
11
17
  end
12
18
 
data/spec/spec_helper.rb CHANGED
@@ -1 +1,2 @@
1
1
  require 'digest/murmurhash'
2
+ include Digest
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-murmurhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - ksss
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-02 00:00:00.000000000 Z
11
+ date: 2013-12-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,16 +58,30 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 0.8.3
61
+ version: 0.9.2
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: 0.8.3
69
- description: Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1
70
- 32-bit) desiged by Austin Appleby.
68
+ version: 0.9.2
69
+ - !ruby/object:Gem::Dependency
70
+ name: digest-stringbuffer
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 0.0.2
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 0.0.2
83
+ description: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
84
+ by Austin Appleby.
71
85
  email: co000ri@gmail.com
72
86
  executables: []
73
87
  extensions:
@@ -82,10 +96,19 @@ files:
82
96
  - Rakefile
83
97
  - digest-murmurhash.gemspec
84
98
  - ext/digest/murmurhash/extconf.rb
85
- - ext/digest/murmurhash/murmurhash.c
99
+ - ext/digest/murmurhash/init.c
100
+ - ext/digest/murmurhash/murmurhash.h
101
+ - ext/digest/murmurhash/murmurhash1.c
102
+ - ext/digest/murmurhash/murmurhash1.h
103
+ - ext/digest/murmurhash/murmurhash2.c
104
+ - ext/digest/murmurhash/murmurhash2.h
105
+ - ext/digest/murmurhash/murmurhash2a.c
106
+ - ext/digest/murmurhash/murmurhash2a.h
107
+ - lib/digest/murmurhash.rb
86
108
  - lib/digest/murmurhash/version.rb
87
109
  - spec/bench.rb
88
110
  - spec/digest_spec.rb
111
+ - spec/exception_spec.rb
89
112
  - spec/mem_spec.rb
90
113
  - spec/spec_helper.rb
91
114
  homepage: ''
@@ -108,13 +131,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
131
  version: '0'
109
132
  requirements: []
110
133
  rubyforge_project:
111
- rubygems_version: 2.1.6
134
+ rubygems_version: 2.1.11
112
135
  signing_key:
113
136
  specification_version: 4
114
- summary: Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit)
115
- desiged by Austin Appleby.
137
+ summary: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
138
+ by Austin Appleby.
116
139
  test_files:
117
140
  - spec/bench.rb
118
141
  - spec/digest_spec.rb
142
+ - spec/exception_spec.rb
119
143
  - spec/mem_spec.rb
120
144
  - spec/spec_helper.rb
@@ -1,209 +0,0 @@
1
- #include "ruby.h"
2
- #ifdef HAVE_RUBY_DIGEST_H
3
- #include "ruby/digest.h"
4
- #else
5
- #include "digest.h"
6
- #endif
7
-
8
- #define MURMURHASH_DIGEST_LENGTH 4
9
- #define MURMURHASH_BLOCK_LENGTH 4
10
-
11
-
12
- ID id_concat;
13
-
14
- typedef struct {
15
- char* data;
16
- char* p;
17
- size_t memsize;
18
- } murmur_t;
19
-
20
- #define MURMURHASH(self, name) \
21
- murmur_t* name; \
22
- Data_Get_Struct(self, murmur_t, name); \
23
- if (name == NULL) { \
24
- rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
25
- }
26
-
27
- static void
28
- murmur_init(murmur_t* ptr)
29
- {
30
- ptr->data = (char*) malloc(sizeof(char) * 64);
31
- ptr->p = ptr->data;
32
- ptr->memsize = 64;
33
- }
34
-
35
- static void
36
- murmur_mark(murmur_t* ptr)
37
- {
38
- }
39
-
40
- static void
41
- murmur_free(murmur_t* ptr)
42
- {
43
- free(ptr->data);
44
- }
45
-
46
- static VALUE
47
- murmur_alloc(VALUE self)
48
- {
49
- murmur_t* ptr = ALLOC(murmur_t);
50
- murmur_init(ptr);
51
- return Data_Wrap_Struct(self, murmur_mark, murmur_free, ptr);
52
- }
53
-
54
- static VALUE
55
- murmur_initialize_copy(VALUE copy, VALUE origin)
56
- {
57
- murmur_t *ptr_copy, *ptr_origin;
58
- size_t data_len;
59
-
60
- if (copy == origin) return copy;
61
-
62
- rb_check_frozen(copy);
63
-
64
- Data_Get_Struct(copy, murmur_t, ptr_copy);
65
- Data_Get_Struct(origin, murmur_t, ptr_origin);
66
-
67
- data_len = ptr_origin->p - ptr_origin->data;
68
- ptr_copy->data = (char*) malloc(sizeof(char) * ptr_origin->memsize);
69
- memcpy(ptr_copy->data, ptr_origin->data, data_len);
70
- ptr_copy->p = ptr_copy->data + data_len;
71
- ptr_copy->memsize = ptr_origin->memsize;
72
-
73
- return copy;
74
- }
75
-
76
- static VALUE
77
- murmur_reset(VALUE self)
78
- {
79
- MURMURHASH(self, ptr);
80
- ptr->p = ptr->data;
81
- return self;
82
- }
83
-
84
- static VALUE
85
- murmur_update(VALUE self, VALUE str)
86
- {
87
- size_t data_len, str_len, require, newsize;
88
- const char* str_p;
89
- MURMURHASH(self, ptr);
90
-
91
- StringValue(str);
92
- str_p = RSTRING_PTR(str);
93
- str_len = RSTRING_LEN(str);
94
- data_len = (ptr->p - ptr->data);
95
- require = data_len + str_len;
96
- if (ptr->memsize < require) {
97
- newsize = ptr->memsize;
98
- while (newsize < require) {
99
- newsize *= 2;
100
- }
101
- ptr->data = realloc(ptr->data, sizeof(char) * newsize);
102
- ptr->p = ptr->data + data_len;
103
- ptr->memsize = newsize;
104
- }
105
- memcpy(ptr->p, str_p, str_len);
106
- ptr->p += str_len;
107
-
108
- return self;
109
- }
110
-
111
- static uint32_t
112
- murmur_hash_process(murmur_t* ptr)
113
- {
114
- const uint32_t m = 0x5bd1e995;
115
- const uint8_t r = 16;
116
- uint32_t length, h;
117
- const char* p;
118
-
119
- p = ptr->data;
120
- length = ptr->p - ptr->data;
121
- h = length * m;
122
-
123
- while (4 <= length) {
124
- h += *(uint32_t*)p;
125
- h *= m;
126
- h ^= h >> r;
127
- p += 4;
128
- length -= 4;
129
- }
130
-
131
- switch (length) {
132
- case 3:
133
- h += p[2] << 16;
134
- case 2:
135
- h += p[1] << 8;
136
- case 1:
137
- h += p[0];
138
- h *= m;
139
- h ^= h >> r;
140
- }
141
-
142
- h *= m;
143
- h ^= h >> 10;
144
- h *= m;
145
- h ^= h >> 17;
146
-
147
- return h;
148
- }
149
-
150
- static VALUE
151
- murmur_finish(VALUE self)
152
- {
153
- uint32_t h;
154
- uint8_t digest[MURMURHASH_DIGEST_LENGTH];
155
- MURMURHASH(self, ptr);
156
-
157
- h = murmur_hash_process(ptr);
158
-
159
- digest[0] = (h >> 24);
160
- digest[1] = (h >> 16);
161
- digest[2] = (h >> 8);
162
- digest[3] = (h);
163
-
164
- return rb_str_new((const char*) digest, 4);
165
- }
166
-
167
- static VALUE
168
- murmur_digest_length(VALUE self)
169
- {
170
- return INT2NUM(MURMURHASH_DIGEST_LENGTH);
171
- }
172
-
173
- static VALUE
174
- murmur_block_length(VALUE self)
175
- {
176
- return INT2NUM(MURMURHASH_BLOCK_LENGTH);
177
- }
178
-
179
- static VALUE
180
- murmur_to_i(VALUE self)
181
- {
182
- MURMURHASH(self, ptr);
183
- return UINT2NUM(murmur_hash_process(ptr));
184
- }
185
-
186
- void
187
- Init_murmurhash()
188
- {
189
- VALUE mDigest, cDigest_Base, cDigest_MurmurHash;
190
-
191
- id_concat = rb_intern("concat");
192
-
193
- rb_require("digest");
194
-
195
- mDigest = rb_path2class("Digest");
196
- cDigest_Base = rb_path2class("Digest::Base");
197
-
198
- cDigest_MurmurHash = rb_define_class_under(mDigest, "MurmurHash", cDigest_Base);
199
-
200
- rb_define_alloc_func(cDigest_MurmurHash, murmur_alloc);
201
- rb_define_method(cDigest_MurmurHash, "initialize_copy", murmur_initialize_copy, 1);
202
- rb_define_method(cDigest_MurmurHash, "reset", murmur_reset, 0);
203
- rb_define_method(cDigest_MurmurHash, "update", murmur_update, 1);
204
- rb_define_private_method(cDigest_MurmurHash, "finish", murmur_finish, 0);
205
- rb_define_method(cDigest_MurmurHash, "digest_length", murmur_digest_length, 0);
206
- rb_define_method(cDigest_MurmurHash, "block_length", murmur_block_length, 0);
207
-
208
- rb_define_method(cDigest_MurmurHash, "to_i", murmur_to_i, 0);
209
- }