digest-murmurhash 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f158d21ca468ee7cccb9c39ba6fc57660850edd
4
- data.tar.gz: 82730d511f8136b2ff960c0d2d99a62c9a78c339
3
+ metadata.gz: 635a6c34802609c512789d68c9ebe571df48cc01
4
+ data.tar.gz: 1a1884fb59d6a9ccfca3c5da969e688589c093f0
5
5
  SHA512:
6
- metadata.gz: 4494bc8ee54eb5bc525c9c8d5a8f275c299b73350fd0f6fcb638e7272fb546cab8d5bd5867cab5f388845ddc2eeec11302c8d982ea1e0e5bb33cc2b50b127d28
7
- data.tar.gz: 20cc99086cc26e5ba616a9d179aa11121f88704a8effb6a417dd3cc58032b8c0fb89a21818c1bf790bfd7dde3b94a0477cd67d74d0fe860c882e339ddfd3c1b8
6
+ metadata.gz: 2633494b6b099a2149deeb3c39af1eb81e80435156bc459b4aee9498ca5209f41ac5a400713b6ae6c762935c5a1d964bc408e63a6e21e0634778d4a78c63f329
7
+ data.tar.gz: 9c61561046045532d0c8fe863059625cb532d051ea978f16fc63b0bd7fada9fded44900d91642695253d50fd94344c9444d35d5e6715b2956c2078713bcd9fd2
data/README.md CHANGED
@@ -2,26 +2,39 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/ksss/digest-murmurhash.png?branch=master)](https://travis-ci.org/ksss/digest-murmurhash)
4
4
 
5
- Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit) desiged by Austin Appleby.
5
+ **MurmurHash** is a algorithm desiged by Austin Appleby.
6
6
 
7
- Digest::MurmurHash compliance Digest API of Ruby.
7
+ **Digest::MurmurHash1** use algorithm MurmurHash1(32-bit).
8
8
 
9
- ## Usage
9
+ **Digest::MurmurHash2** use algorithm MurmurHash2(32-bit).
10
+
11
+ (**Digest::MurmurHash** exist to write only version number)
10
12
 
11
- All interface compliance Digest::Base.
13
+ All classes compliance Digest API of Ruby.
14
+
15
+ ## Usage
12
16
 
13
- You can use same interface built in Digest::XXX classes.
17
+ You can use same interface built-in Digest::XXX classes.
14
18
 
15
19
  ```ruby
16
20
  require 'digest/murmurhash'
17
21
 
18
- p Digest::MurmurHash.hexdigest('murmurhash') #=> 'c709abd5'
19
- p Digest::MurmurHash.file("./LICENSE.txt").hexdigest #=> '712e9641'
22
+ # MurmurHash1 can use like same than Digest::XXX.
23
+
24
+ p Digest::MurmurHash1.hexdigest('murmurhash') #=> 'c709abd5'
25
+ p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> '712e9641'
26
+
27
+ # and MurmurHash2 too. but return another value because using another algorithm.
28
+
29
+ p Digest::MurmurHash2.hexdigest('murmurhash') #=> '33f67c7e'
30
+ p Digest::MurmurHash2.file("./LICENSE.txt").hexdigest #=> '78678326'
20
31
  ```
21
32
 
22
33
  ## Class tree
23
34
 
24
- Digest::MurmurHash < Digest::Base < Digest::Instance < Digest::Class < Object < Kernel < BasicObject
35
+ **Digest::MurmurHash1** < Digest::StringBuffer
36
+
37
+ **Digest::MurmurHash2** < Digest::StringBuffer
25
38
 
26
39
  ## Installation
27
40
 
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ require 'rake/extensiontask'
14
14
  spec = Bundler::GemHelper.gemspec
15
15
  Rake::ExtensionTask.new('murmurhash', spec) do |ext|
16
16
  ext.ext_dir = 'ext/digest/murmurhash'
17
- ext.lib_dir = 'lib/digest'
17
+ ext.lib_dir = 'lib/digest/murmurhash'
18
18
  end
19
19
 
20
20
 
@@ -1,16 +1,15 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- # require 'digest/murmurhash/version'
4
+ require 'digest/murmurhash/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "digest-murmurhash"
8
- # FIXME replace Digest::MurmurHash::VERSION but raise NameError
9
- spec.version = "0.1.0"
8
+ spec.version = Digest::MurmurHash::VERSION
10
9
  spec.author = "ksss"
11
10
  spec.email = "co000ri@gmail.com"
12
- spec.description = %q{Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit) desiged by Austin Appleby.}
13
- spec.summary = %q{Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit) desiged by Austin Appleby.}
11
+ spec.description = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
12
+ spec.summary = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
14
13
  spec.homepage = ""
15
14
  spec.license = "MIT"
16
15
 
@@ -23,5 +22,6 @@ Gem::Specification.new do |spec|
23
22
  spec.add_development_dependency "bundler", "~> 1.3"
24
23
  spec.add_development_dependency "rake"
25
24
  spec.add_development_dependency "rspec", ['~> 2.11']
26
- spec.add_development_dependency "rake-compiler", ["~> 0.8.3"]
25
+ spec.add_development_dependency "rake-compiler", ["~> 0.9.2"]
26
+ spec.add_development_dependency "digest-stringbuffer", ["~> 0.0.2"]
27
27
  end
@@ -4,4 +4,4 @@ have_header('ruby/digest.h')
4
4
 
5
5
  $preload = %w[digest]
6
6
 
7
- create_makefile('digest/murmurhash')
7
+ create_makefile('digest/murmurhash/murmurhash')
@@ -0,0 +1,34 @@
1
+ #include "murmurhash.h"
2
+ #include "murmurhash1.h"
3
+ #include "murmurhash2.h"
4
+ #include "murmurhash2a.h"
5
+
6
+ void
7
+ Init_murmurhash(void)
8
+ {
9
+ VALUE mDigest, cDigest_StringBuffer;
10
+ VALUE cDigest_MurmurHash1, cDigest_MurmurHash2, cDigest_MurmurHash2A;
11
+
12
+ /* Digest::MurmurHash is require that Digest::StringBuffer */
13
+ rb_require("digest/stringbuffer");
14
+ mDigest = rb_path2class("Digest");
15
+ cDigest_StringBuffer = rb_path2class("Digest::StringBuffer");
16
+
17
+ /* class Digest::MurmurHash1 < Digest::StringBuffer */
18
+ cDigest_MurmurHash1 = rb_define_class_under(mDigest, "MurmurHash1", cDigest_StringBuffer);
19
+ rb_define_private_method(cDigest_MurmurHash1, "finish", murmur1_finish, 0);
20
+ rb_define_method(cDigest_MurmurHash1, "to_i", murmur1_to_i, 0);
21
+ rb_define_singleton_method(cDigest_MurmurHash1, "rawdigest", murmur1_s_rawdigest, -1);
22
+
23
+ /* class Digest::MurmurHash2 < Digest::StringBuffer */
24
+ cDigest_MurmurHash2 = rb_define_class_under(mDigest, "MurmurHash2", cDigest_StringBuffer);
25
+ rb_define_private_method(cDigest_MurmurHash2, "finish", murmur2_finish, 0);
26
+ rb_define_method(cDigest_MurmurHash2, "to_i", murmur2_to_i, 0);
27
+ rb_define_singleton_method(cDigest_MurmurHash2, "rawdigest", murmur2_s_rawdigest, -1);
28
+
29
+ /* class Digest::MurmurHash2A < Digest::StringBuffer */
30
+ cDigest_MurmurHash2A = rb_define_class_under(mDigest, "MurmurHash2A", cDigest_StringBuffer);
31
+ rb_define_private_method(cDigest_MurmurHash2A, "finish", murmur2a_finish, 0);
32
+ rb_define_method(cDigest_MurmurHash2A, "to_i", murmur2a_to_i, 0);
33
+ rb_define_singleton_method(cDigest_MurmurHash2A, "rawdigest", murmur2a_s_rawdigest, -1);
34
+ }
@@ -0,0 +1,23 @@
1
+ #ifndef MURMURHASH_INCLUDED
2
+ # define MURMURHASH_INCLUDED
3
+
4
+ #include "ruby.h"
5
+
6
+ #define MURMURHASH_MAGIC 0x5bd1e995
7
+
8
+ /* should be same type structure to digest/stringbuffer */
9
+ typedef struct {
10
+ char* buffer;
11
+ char* p;
12
+ size_t memsize;
13
+ } buffer_t;
14
+
15
+ #define MURMURHASH(self, name) \
16
+ buffer_t* name; \
17
+ Data_Get_Struct(self, buffer_t, name); \
18
+ if (name == NULL) { \
19
+ rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
20
+ }
21
+
22
+ #endif /* ifndef MURMURHASH_INCLUDED */
23
+
@@ -0,0 +1,86 @@
1
+ /*
2
+ * MurmurHash1 (C) Austin Appleby
3
+ */
4
+
5
+ #include "murmurhash1.h"
6
+
7
+ static inline size_t
8
+ murmur1(uint32_t h, const uint8_t r)
9
+ {
10
+ const uint32_t m = MURMURHASH_MAGIC;
11
+ h *= m;
12
+ h ^= h >> r;
13
+ return h;
14
+ }
15
+
16
+ static uint32_t
17
+ murmur_hash_process1(const char *data, uint32_t length)
18
+ {
19
+ const uint32_t m = MURMURHASH_MAGIC;
20
+ const uint8_t r = 16;
21
+ uint32_t h;
22
+
23
+ h = length * m;
24
+
25
+ while (4 <= length) {
26
+ h += *(uint32_t*)data;
27
+ h = murmur1(h, r);
28
+ data += 4;
29
+ length -= 4;
30
+ }
31
+
32
+ switch (length) {
33
+ case 3:
34
+ h += data[2] << 16;
35
+ case 2:
36
+ h += data[1] << 8;
37
+ case 1:
38
+ h += data[0];
39
+ h = murmur1(h, r);
40
+ }
41
+
42
+ h = murmur1(h, 10);
43
+ h = murmur1(h, 17);
44
+
45
+ return h;
46
+ }
47
+
48
+ VALUE
49
+ murmur1_finish(VALUE self)
50
+ {
51
+ uint32_t h;
52
+ uint8_t digest[4];
53
+ MURMURHASH(self, ptr);
54
+
55
+ h = murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer);
56
+
57
+ digest[0] = h >> 24;
58
+ digest[1] = h >> 16;
59
+ digest[2] = h >> 8;
60
+ digest[3] = h;
61
+
62
+ return rb_str_new((const char*) digest, 4);
63
+ }
64
+
65
+ VALUE
66
+ murmur1_to_i(VALUE self)
67
+ {
68
+ MURMURHASH(self, ptr);
69
+ return UINT2NUM(murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer));
70
+ }
71
+
72
+ VALUE
73
+ murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass)
74
+ {
75
+ VALUE str;
76
+
77
+ if (argc < 1)
78
+ rb_raise(rb_eArgError, "no data given");
79
+
80
+ str = *argv++;
81
+ argc--;
82
+
83
+ StringValue(str);
84
+
85
+ return UINT2NUM(murmur_hash_process1(RSTRING_PTR(str), RSTRING_LEN(str)));
86
+ }
@@ -0,0 +1,11 @@
1
+ #ifndef MURMURHASH1_INCLUDED
2
+ # define MURMURHASH1_INCLUDED
3
+
4
+ #include "murmurhash.h"
5
+
6
+ VALUE murmur1_finish(VALUE self);
7
+ VALUE murmur1_to_i(VALUE self);
8
+ VALUE murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass);
9
+
10
+ #endif /* ifndef MURMURHASH1_INCLUDED */
11
+
@@ -0,0 +1,90 @@
1
+ /*
2
+ * MurmurHash2 (C) Austin Appleby
3
+ */
4
+
5
+ #include "murmurhash2.h"
6
+
7
+ static inline size_t
8
+ murmur2(uint32_t h, uint32_t k, const uint8_t r)
9
+ {
10
+ const uint32_t m = MURMURHASH_MAGIC;
11
+ k *= m;
12
+ k ^= k >> r;
13
+ k *= m;
14
+
15
+ h *= m;
16
+ h ^= k;
17
+ return h;
18
+ }
19
+
20
+ static uint32_t
21
+ murmur_hash_process2(const char *data, uint32_t length)
22
+ {
23
+ const uint32_t m = MURMURHASH_MAGIC;
24
+ const uint8_t r = 24;
25
+ uint32_t h, k;
26
+
27
+ h = length * m;
28
+
29
+ while (4 <= length) {
30
+ k = *(uint32_t*)data;
31
+ h = murmur2(h, k, r);
32
+ data += 4;
33
+ length -= 4;
34
+ }
35
+
36
+ switch (length) {
37
+ case 3: h ^= data[2] << 16;
38
+ case 2: h ^= data[1] << 8;
39
+ case 1: h ^= data[0];
40
+ h *= m;
41
+ }
42
+
43
+ h ^= h >> 13;
44
+ h *= m;
45
+ h ^= h >> 15;
46
+
47
+ return h;
48
+ }
49
+
50
+ VALUE
51
+ murmur2_finish(VALUE self)
52
+ {
53
+ uint32_t h;
54
+ uint8_t digest[4];
55
+ MURMURHASH(self, ptr);
56
+
57
+ h = murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer);
58
+
59
+ digest[0] = h >> 24;
60
+ digest[1] = h >> 16;
61
+ digest[2] = h >> 8;
62
+ digest[3] = h;
63
+
64
+ return rb_str_new((const char*) digest, 4);
65
+ }
66
+
67
+ VALUE
68
+ murmur2_to_i(VALUE self)
69
+ {
70
+ MURMURHASH(self, ptr);
71
+ return UINT2NUM(murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer));
72
+ }
73
+
74
+ VALUE
75
+ murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
76
+ {
77
+ VALUE str;
78
+ volatile VALUE obj;
79
+
80
+ if (argc < 1)
81
+ rb_raise(rb_eArgError, "no data given");
82
+
83
+ str = *argv++;
84
+ argc--;
85
+
86
+ StringValue(str);
87
+
88
+ return UINT2NUM(murmur_hash_process2(RSTRING_PTR(str), RSTRING_LEN(str)));
89
+ }
90
+
@@ -0,0 +1,11 @@
1
+ #ifndef MURMURHASH2_INCLUDED
2
+ # define MURMURHASH2_INCLUDED
3
+
4
+ #include "murmurhash.h"
5
+
6
+ VALUE murmur2_finish(VALUE self);
7
+ VALUE murmur2_to_i(VALUE self);
8
+ VALUE murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
9
+
10
+ #endif /* ifndef MURMURHASH2_INCLUDED */
11
+
@@ -0,0 +1,84 @@
1
+ /*
2
+ * MurmurHash2A (C) Austin Appleby
3
+ */
4
+
5
+ #include "murmurhash2a.h"
6
+
7
+ #define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
8
+
9
+ static uint32_t
10
+ murmur_hash_process2a(const void *key, uint32_t length)
11
+ {
12
+ const uint32_t m = MURMURHASH_MAGIC;
13
+ const uint8_t r = 24;
14
+ uint32_t h, k, t, l;
15
+ const unsigned char *data = (const unsigned char *) key;
16
+
17
+ l = length;
18
+ h = 0 ^ length;
19
+
20
+ while (4 <= length) {
21
+ k = *(uint32_t*)data;
22
+ mmix(h,k);
23
+ data += 4;
24
+ length -= 4;
25
+ }
26
+
27
+ t = 0;
28
+ switch (length) {
29
+ case 3: t ^= data[2] << 16;
30
+ case 2: t ^= data[1] << 8;
31
+ case 1: t ^= data[0];
32
+ }
33
+
34
+ mmix(h,t);
35
+ mmix(h,l);
36
+
37
+ h ^= h >> 13;
38
+ h *= m;
39
+ h ^= h >> 15;
40
+
41
+ return h;
42
+ }
43
+
44
+ VALUE
45
+ murmur2a_finish(VALUE self)
46
+ {
47
+ uint32_t h;
48
+ uint8_t digest[4];
49
+ MURMURHASH(self, ptr);
50
+
51
+ h = murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer);
52
+
53
+ digest[0] = h >> 24;
54
+ digest[1] = h >> 16;
55
+ digest[2] = h >> 8;
56
+ digest[3] = h;
57
+
58
+ return rb_str_new((const char*) digest, 4);
59
+ }
60
+
61
+ VALUE
62
+ murmur2a_to_i(VALUE self)
63
+ {
64
+ MURMURHASH(self, ptr);
65
+ return UINT2NUM(murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer));
66
+ }
67
+
68
+ VALUE
69
+ murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
70
+ {
71
+ VALUE str;
72
+ volatile VALUE obj;
73
+
74
+ if (argc < 1)
75
+ rb_raise(rb_eArgError, "no data given");
76
+
77
+ str = *argv++;
78
+ argc--;
79
+
80
+ StringValue(str);
81
+
82
+ return UINT2NUM(murmur_hash_process2a(RSTRING_PTR(str), RSTRING_LEN(str)));
83
+ }
84
+
@@ -0,0 +1,11 @@
1
+ #ifndef MURMURHASH2A_INCLUDED
2
+ # define MURMURHASH2A_INCLUDED
3
+
4
+ #include "murmurhash.h"
5
+
6
+ VALUE murmur2a_finish(VALUE self);
7
+ VALUE murmur2a_to_i(VALUE self);
8
+ VALUE murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
9
+
10
+ #endif /* ifndef MURMURHASH2A_INCLUDED */
11
+
@@ -0,0 +1,6 @@
1
+ begin
2
+ require "digest/murmurhash/#{RUBY_VERSION[/\d+.\d+/]}/murmurhash"
3
+ rescue LoadError
4
+ require "digest/murmurhash/murmurhash"
5
+ end
6
+ require "digest/murmurhash/version"
@@ -1,5 +1,5 @@
1
1
  module Digest
2
2
  class MurmurHash
3
- VERSION = "0.1.0"
3
+ VERSION = "0.2.1"
4
4
  end
5
5
  end
data/spec/bench.rb CHANGED
@@ -3,9 +3,24 @@
3
3
  lib = File.expand_path('../../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
+ require 'digest/stringbuffer'
6
7
  require 'digest/murmurhash'
7
8
  require 'benchmark'
8
9
 
10
+ class Prime37 < Digest::StringBuffer
11
+ def initialize
12
+ @prime = 37
13
+ end
14
+
15
+ def finish
16
+ result = 0
17
+ buffer.unpack("C*").each do |c|
18
+ result += (c * @prime)
19
+ end
20
+ [result & 0xffffffff].pack("N")
21
+ end
22
+ end
23
+
9
24
  class Integer
10
25
  def to_32
11
26
  self & 0xffffffff
@@ -44,30 +59,82 @@ def murmur_hash str
44
59
  h = (h * m).to_32
45
60
  h ^= h >> 17
46
61
 
47
- h.to_32
62
+ h
48
63
  end
49
64
 
65
+ @rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
50
66
  def rand_str
51
67
  rand = "";
52
- File.open("/dev/urandom").read(20).each_byte{|x| rand << sprintf("%02x",x)}
68
+ 20.times {
69
+ rand << @rands[rand(62)]
70
+ }
53
71
  rand
54
72
  end
55
73
 
56
- s = rand_str
57
- p [murmur_hash(s)].pack("N")
58
- p Digest::MurmurHash.digest(s)
74
+ n = 100000
75
+ times_enum = n.times
76
+
77
+ a = Array.new(n, 0)
78
+ n.times do |i|
79
+ a[i] = rand_str
80
+ end
81
+
82
+ c = Struct.new "Cases",
83
+ :name,
84
+ :func
85
+ cases = [
86
+ c.new("pureRuby", proc{|x| murmur_hash x }),
87
+ c.new("Prime37", proc{|x| Prime37.digest x }),
88
+ c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x }),
89
+ c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x }),
90
+ c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x }),
91
+ ]
92
+
93
+ reals = {}
94
+ confrict = {}
95
+ confricts = {}
59
96
 
97
+ puts "### condition"
98
+ puts
99
+ puts " RUBY_VERSION = #{RUBY_VERSION}"
100
+ puts " count = #{n}"
101
+ puts
102
+ puts "### benchmark"
103
+ puts
104
+ puts "```"
60
105
  Benchmark.bm do |x|
61
- n = 10000
62
- a = []
63
- n.times { |i|
64
- a[i] = rand_str
65
- }
106
+ cases.each do |c|
107
+ z = x.report c.name do
108
+ times_enum.each do |i|
109
+ c.func.call(a[i])
110
+ end
111
+ end
112
+
113
+ confrict.clear
114
+ times_enum.each do |i|
115
+ rethash = c.func.call(a[i])
116
+ if confrict[rethash].nil?
117
+ confrict[rethash] = 0
118
+ else
119
+ confrict[rethash] += 1
120
+ end
121
+ end
122
+ reals[c.name] = z.real
123
+ confricts[c.name] = confrict.count{|hash, count| 0 < count}
124
+ end
125
+ end
126
+ puts "```"
127
+
128
+ puts
129
+ puts "### real second rate (pureRuby/)"
130
+ puts
131
+ reals.each do |name, real|
132
+ puts " " + (reals["pureRuby"] / real).to_s + "/" + name
133
+ end
66
134
 
67
- x.report {n.times{ |i|
68
- [murmur_hash(a[i])].pack("N")
69
- }}
70
- x.report {n.times{ |i|
71
- Digest::MurmurHash.digest(a[i])
72
- }}
135
+ puts
136
+ puts "### confrict count (/#{n})"
137
+ puts
138
+ confricts.each do |name, count|
139
+ puts " #{name}: #{count}"
73
140
  end
data/spec/digest_spec.rb CHANGED
@@ -1,60 +1,82 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Digest::MurmurHash do
4
- let :murmur do
5
- Digest::MurmurHash.new
3
+ describe "Digest::MurmurHash1 and 2" do
4
+ let :all do
5
+ [MurmurHash1, MurmurHash2, MurmurHash2A]
6
6
  end
7
7
 
8
8
  it "initialize" do
9
- expect(Digest::MurmurHash.new).to be_a_kind_of(Digest::Base)
9
+ expect(MurmurHash1.new).to be_a_kind_of(Digest::StringBuffer)
10
+ expect(MurmurHash2.new).to be_a_kind_of(Digest::StringBuffer)
11
+ expect(MurmurHash2A.new).to be_a_kind_of(Digest::StringBuffer)
12
+ end
13
+
14
+ it "digest" do
15
+ expect(MurmurHash1.digest("a" * 1024)).to eq("\xa1\x52\x2e\x5f".force_encoding("ASCII-8BIT"))
16
+ expect(MurmurHash2.digest("a" * 1024)).to eq("\xd0\x0c\x31\x2f".force_encoding("ASCII-8BIT"))
17
+ expect(MurmurHash2A.digest("a" * 1024)).to eq("\xd5\x2d\xb1\x67".force_encoding("ASCII-8BIT"))
10
18
  end
11
19
 
12
20
  it "hexdigest" do
13
- expect(Digest::MurmurHash.hexdigest("a" * 1024)).to eq("a1522e5f")
21
+ expect(MurmurHash1.hexdigest("a" * 1024)).to eq("a1522e5f")
22
+ expect(MurmurHash2.hexdigest("a" * 1024)).to eq("d00c312f")
23
+ expect(MurmurHash2A.hexdigest("a" * 1024)).to eq("d52db167")
14
24
  end
15
25
 
16
- it "update and reset and hexdigest" do
17
- murmur.update("m")
18
- murmur.update("u")
19
- murmur.update("r")
20
- murmur.update("m")
21
- murmur.update("u")
22
- murmur.update("r")
23
- murmur.update("hash")
24
- expect(murmur.hexdigest).to eq("c709abd5");
25
- expect(murmur.hexdigest).to eq("c709abd5");
26
- expect(murmur.hexdigest!).to eq("c709abd5");
27
- expect(murmur.hexdigest).to eq("00000000");
26
+ it "rawdigest" do
27
+ expect(MurmurHash1.rawdigest("a" * 1024)).to eq(0xa1522e5f)
28
+ expect(MurmurHash2.rawdigest("a" * 1024)).to eq(0xd00c312f)
29
+ expect(MurmurHash2A.rawdigest("a" * 1024)).to eq(0xd52db167)
28
30
  end
29
31
 
30
- it "dup" do
31
- murmur1 = Digest::MurmurHash.new
32
- murmur2 = Digest::MurmurHash.new
33
- murmur1.update("murmur")
34
- murmur2 = murmur1.dup
35
- murmur2.update("hash")
36
- expect(murmur2.hexdigest).to eq("c709abd5")
32
+ it "update and reset and hexdigest" do
33
+ {
34
+ MurmurHash1 => "c709abd5",
35
+ MurmurHash2 => "33f67c7e",
36
+ MurmurHash2A => "df25554b",
37
+ }.each do |c, should|
38
+ murmur = c.new
39
+ murmur.update("m").update("u").update("r")
40
+ murmur << "m" << "u" << "r"
41
+ murmur << "hash"
42
+ expect(murmur.hexdigest).to eq(should);
43
+ expect(murmur.hexdigest).to eq(should);
44
+ expect(murmur.hexdigest!).to eq(should);
45
+ expect(murmur.hexdigest).to eq("00000000");
46
+ end
37
47
  end
38
48
 
39
49
  it "==" do
40
- ["", "murmur", "murmurhash" * 1024].each do |str|
41
- murmur1 = Digest::MurmurHash.new
42
- murmur2 = Digest::MurmurHash.new
43
- expect(murmur1.update(str) == murmur2.update(str)).to be_true
50
+ all.each do |c|
51
+ ["", "murmur", "murmurhash" * 1024].each do |str|
52
+ murmur1 = c.new
53
+ murmur2 = c.new
54
+ expect(murmur1.update(str) == murmur2.update(str)).to be_true
55
+ end
44
56
  end
45
57
  end
46
58
 
47
- it "length" do
48
- expect(murmur.length).to eq(4);
59
+ it "dup" do
60
+ all.each do |c|
61
+ murmur1 = c.new
62
+ murmur2 = c.new
63
+ 10.times {
64
+ murmur1 = murmur1.update("murmurhash" * 100).dup
65
+ }
66
+ murmur2.update(("murmurhash" * 100) * 10)
67
+ expect(murmur1 == murmur2).to be_true
68
+ end
49
69
  end
50
70
 
51
- it "block_length" do
52
- # MurmurHash don't use block.
53
- # Therefore `block_length` return chunk size for calculate MurmurHash (equal 4)
54
- expect(murmur.block_length).to eq(4);
71
+ it "length" do
72
+ expect(MurmurHash1.new.length).to eq(4);
73
+ expect(MurmurHash2.new.length).to eq(4);
74
+ expect(MurmurHash2A.new.length).to eq(4);
55
75
  end
56
76
 
57
77
  it "to_i" do
58
- expect(murmur.update("murmurhash").to_i).to eq(0xc709abd5);
78
+ expect(MurmurHash1.new.update("murmurhash").to_i).to eq(0xc709abd5);
79
+ expect(MurmurHash2.new.update("murmurhash").to_i).to eq(0x33f67c7e);
80
+ expect(MurmurHash2A.new.update("murmurhash").to_i).to eq(0xdf25554b);
59
81
  end
60
82
  end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+
3
+ describe MurmurHash do
4
+ let :all do
5
+ [MurmurHash1, MurmurHash2, MurmurHash2A]
6
+ end
7
+
8
+ it "update nil" do
9
+ all.each do |c|
10
+ murmur = c.new
11
+ expect{ murmur.update }.to raise_error(ArgumentError)
12
+ end
13
+ end
14
+
15
+ it "rawdigest no arguments" do
16
+ all.each do |c|
17
+ expect{ c.rawdigest }.to raise_error(ArgumentError)
18
+ end
19
+ end
20
+ end
data/spec/mem_spec.rb CHANGED
@@ -1,12 +1,18 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe Digest::MurmurHash do
3
+ describe MurmurHash do
4
4
  it "gc safe" do
5
- murmur = Digest::MurmurHash.new
6
- GC.start
7
- murmur.update("murmur")
8
- GC.start
9
- expect(murmur.update("hash").to_s).to eq("c709abd5");
5
+ {
6
+ MurmurHash1 => "c709abd5",
7
+ MurmurHash2 => "33f67c7e",
8
+ MurmurHash2A => "df25554b",
9
+ }.each do |c, should|
10
+ murmur = c.new
11
+ GC.start
12
+ murmur.update("murmur")
13
+ GC.start
14
+ expect(murmur.update("hash").to_s).to eq(should);
15
+ end
10
16
  end
11
17
  end
12
18
 
data/spec/spec_helper.rb CHANGED
@@ -1 +1,2 @@
1
1
  require 'digest/murmurhash'
2
+ include Digest
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digest-murmurhash
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - ksss
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-02 00:00:00.000000000 Z
11
+ date: 2013-12-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -58,16 +58,30 @@ dependencies:
58
58
  requirements:
59
59
  - - ~>
60
60
  - !ruby/object:Gem::Version
61
- version: 0.8.3
61
+ version: 0.9.2
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ~>
67
67
  - !ruby/object:Gem::Version
68
- version: 0.8.3
69
- description: Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1
70
- 32-bit) desiged by Austin Appleby.
68
+ version: 0.9.2
69
+ - !ruby/object:Gem::Dependency
70
+ name: digest-stringbuffer
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ version: 0.0.2
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ~>
81
+ - !ruby/object:Gem::Version
82
+ version: 0.0.2
83
+ description: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
84
+ by Austin Appleby.
71
85
  email: co000ri@gmail.com
72
86
  executables: []
73
87
  extensions:
@@ -82,10 +96,19 @@ files:
82
96
  - Rakefile
83
97
  - digest-murmurhash.gemspec
84
98
  - ext/digest/murmurhash/extconf.rb
85
- - ext/digest/murmurhash/murmurhash.c
99
+ - ext/digest/murmurhash/init.c
100
+ - ext/digest/murmurhash/murmurhash.h
101
+ - ext/digest/murmurhash/murmurhash1.c
102
+ - ext/digest/murmurhash/murmurhash1.h
103
+ - ext/digest/murmurhash/murmurhash2.c
104
+ - ext/digest/murmurhash/murmurhash2.h
105
+ - ext/digest/murmurhash/murmurhash2a.c
106
+ - ext/digest/murmurhash/murmurhash2a.h
107
+ - lib/digest/murmurhash.rb
86
108
  - lib/digest/murmurhash/version.rb
87
109
  - spec/bench.rb
88
110
  - spec/digest_spec.rb
111
+ - spec/exception_spec.rb
89
112
  - spec/mem_spec.rb
90
113
  - spec/spec_helper.rb
91
114
  homepage: ''
@@ -108,13 +131,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
131
  version: '0'
109
132
  requirements: []
110
133
  rubyforge_project:
111
- rubygems_version: 2.1.6
134
+ rubygems_version: 2.1.11
112
135
  signing_key:
113
136
  specification_version: 4
114
- summary: Digest::MurmurHash is a class of use algorithm MurmurHash(MurmurHash1 32-bit)
115
- desiged by Austin Appleby.
137
+ summary: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
138
+ by Austin Appleby.
116
139
  test_files:
117
140
  - spec/bench.rb
118
141
  - spec/digest_spec.rb
142
+ - spec/exception_spec.rb
119
143
  - spec/mem_spec.rb
120
144
  - spec/spec_helper.rb
@@ -1,209 +0,0 @@
1
- #include "ruby.h"
2
- #ifdef HAVE_RUBY_DIGEST_H
3
- #include "ruby/digest.h"
4
- #else
5
- #include "digest.h"
6
- #endif
7
-
8
- #define MURMURHASH_DIGEST_LENGTH 4
9
- #define MURMURHASH_BLOCK_LENGTH 4
10
-
11
-
12
- ID id_concat;
13
-
14
- typedef struct {
15
- char* data;
16
- char* p;
17
- size_t memsize;
18
- } murmur_t;
19
-
20
- #define MURMURHASH(self, name) \
21
- murmur_t* name; \
22
- Data_Get_Struct(self, murmur_t, name); \
23
- if (name == NULL) { \
24
- rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
25
- }
26
-
27
- static void
28
- murmur_init(murmur_t* ptr)
29
- {
30
- ptr->data = (char*) malloc(sizeof(char) * 64);
31
- ptr->p = ptr->data;
32
- ptr->memsize = 64;
33
- }
34
-
35
- static void
36
- murmur_mark(murmur_t* ptr)
37
- {
38
- }
39
-
40
- static void
41
- murmur_free(murmur_t* ptr)
42
- {
43
- free(ptr->data);
44
- }
45
-
46
- static VALUE
47
- murmur_alloc(VALUE self)
48
- {
49
- murmur_t* ptr = ALLOC(murmur_t);
50
- murmur_init(ptr);
51
- return Data_Wrap_Struct(self, murmur_mark, murmur_free, ptr);
52
- }
53
-
54
- static VALUE
55
- murmur_initialize_copy(VALUE copy, VALUE origin)
56
- {
57
- murmur_t *ptr_copy, *ptr_origin;
58
- size_t data_len;
59
-
60
- if (copy == origin) return copy;
61
-
62
- rb_check_frozen(copy);
63
-
64
- Data_Get_Struct(copy, murmur_t, ptr_copy);
65
- Data_Get_Struct(origin, murmur_t, ptr_origin);
66
-
67
- data_len = ptr_origin->p - ptr_origin->data;
68
- ptr_copy->data = (char*) malloc(sizeof(char) * ptr_origin->memsize);
69
- memcpy(ptr_copy->data, ptr_origin->data, data_len);
70
- ptr_copy->p = ptr_copy->data + data_len;
71
- ptr_copy->memsize = ptr_origin->memsize;
72
-
73
- return copy;
74
- }
75
-
76
- static VALUE
77
- murmur_reset(VALUE self)
78
- {
79
- MURMURHASH(self, ptr);
80
- ptr->p = ptr->data;
81
- return self;
82
- }
83
-
84
- static VALUE
85
- murmur_update(VALUE self, VALUE str)
86
- {
87
- size_t data_len, str_len, require, newsize;
88
- const char* str_p;
89
- MURMURHASH(self, ptr);
90
-
91
- StringValue(str);
92
- str_p = RSTRING_PTR(str);
93
- str_len = RSTRING_LEN(str);
94
- data_len = (ptr->p - ptr->data);
95
- require = data_len + str_len;
96
- if (ptr->memsize < require) {
97
- newsize = ptr->memsize;
98
- while (newsize < require) {
99
- newsize *= 2;
100
- }
101
- ptr->data = realloc(ptr->data, sizeof(char) * newsize);
102
- ptr->p = ptr->data + data_len;
103
- ptr->memsize = newsize;
104
- }
105
- memcpy(ptr->p, str_p, str_len);
106
- ptr->p += str_len;
107
-
108
- return self;
109
- }
110
-
111
- static uint32_t
112
- murmur_hash_process(murmur_t* ptr)
113
- {
114
- const uint32_t m = 0x5bd1e995;
115
- const uint8_t r = 16;
116
- uint32_t length, h;
117
- const char* p;
118
-
119
- p = ptr->data;
120
- length = ptr->p - ptr->data;
121
- h = length * m;
122
-
123
- while (4 <= length) {
124
- h += *(uint32_t*)p;
125
- h *= m;
126
- h ^= h >> r;
127
- p += 4;
128
- length -= 4;
129
- }
130
-
131
- switch (length) {
132
- case 3:
133
- h += p[2] << 16;
134
- case 2:
135
- h += p[1] << 8;
136
- case 1:
137
- h += p[0];
138
- h *= m;
139
- h ^= h >> r;
140
- }
141
-
142
- h *= m;
143
- h ^= h >> 10;
144
- h *= m;
145
- h ^= h >> 17;
146
-
147
- return h;
148
- }
149
-
150
- static VALUE
151
- murmur_finish(VALUE self)
152
- {
153
- uint32_t h;
154
- uint8_t digest[MURMURHASH_DIGEST_LENGTH];
155
- MURMURHASH(self, ptr);
156
-
157
- h = murmur_hash_process(ptr);
158
-
159
- digest[0] = (h >> 24);
160
- digest[1] = (h >> 16);
161
- digest[2] = (h >> 8);
162
- digest[3] = (h);
163
-
164
- return rb_str_new((const char*) digest, 4);
165
- }
166
-
167
- static VALUE
168
- murmur_digest_length(VALUE self)
169
- {
170
- return INT2NUM(MURMURHASH_DIGEST_LENGTH);
171
- }
172
-
173
- static VALUE
174
- murmur_block_length(VALUE self)
175
- {
176
- return INT2NUM(MURMURHASH_BLOCK_LENGTH);
177
- }
178
-
179
- static VALUE
180
- murmur_to_i(VALUE self)
181
- {
182
- MURMURHASH(self, ptr);
183
- return UINT2NUM(murmur_hash_process(ptr));
184
- }
185
-
186
- void
187
- Init_murmurhash()
188
- {
189
- VALUE mDigest, cDigest_Base, cDigest_MurmurHash;
190
-
191
- id_concat = rb_intern("concat");
192
-
193
- rb_require("digest");
194
-
195
- mDigest = rb_path2class("Digest");
196
- cDigest_Base = rb_path2class("Digest::Base");
197
-
198
- cDigest_MurmurHash = rb_define_class_under(mDigest, "MurmurHash", cDigest_Base);
199
-
200
- rb_define_alloc_func(cDigest_MurmurHash, murmur_alloc);
201
- rb_define_method(cDigest_MurmurHash, "initialize_copy", murmur_initialize_copy, 1);
202
- rb_define_method(cDigest_MurmurHash, "reset", murmur_reset, 0);
203
- rb_define_method(cDigest_MurmurHash, "update", murmur_update, 1);
204
- rb_define_private_method(cDigest_MurmurHash, "finish", murmur_finish, 0);
205
- rb_define_method(cDigest_MurmurHash, "digest_length", murmur_digest_length, 0);
206
- rb_define_method(cDigest_MurmurHash, "block_length", murmur_block_length, 0);
207
-
208
- rb_define_method(cDigest_MurmurHash, "to_i", murmur_to_i, 0);
209
- }