digest-murmurhash 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -8
- data/Rakefile +1 -1
- data/digest-murmurhash.gemspec +6 -6
- data/ext/digest/murmurhash/extconf.rb +1 -1
- data/ext/digest/murmurhash/init.c +34 -0
- data/ext/digest/murmurhash/murmurhash.h +23 -0
- data/ext/digest/murmurhash/murmurhash1.c +86 -0
- data/ext/digest/murmurhash/murmurhash1.h +11 -0
- data/ext/digest/murmurhash/murmurhash2.c +90 -0
- data/ext/digest/murmurhash/murmurhash2.h +11 -0
- data/ext/digest/murmurhash/murmurhash2a.c +84 -0
- data/ext/digest/murmurhash/murmurhash2a.h +11 -0
- data/lib/digest/murmurhash.rb +6 -0
- data/lib/digest/murmurhash/version.rb +1 -1
- data/spec/bench.rb +83 -16
- data/spec/digest_spec.rb +57 -35
- data/spec/exception_spec.rb +20 -0
- data/spec/mem_spec.rb +12 -6
- data/spec/spec_helper.rb +1 -0
- metadata +34 -10
- data/ext/digest/murmurhash/murmurhash.c +0 -209
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 635a6c34802609c512789d68c9ebe571df48cc01
|
4
|
+
data.tar.gz: 1a1884fb59d6a9ccfca3c5da969e688589c093f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2633494b6b099a2149deeb3c39af1eb81e80435156bc459b4aee9498ca5209f41ac5a400713b6ae6c762935c5a1d964bc408e63a6e21e0634778d4a78c63f329
|
7
|
+
data.tar.gz: 9c61561046045532d0c8fe863059625cb532d051ea978f16fc63b0bd7fada9fded44900d91642695253d50fd94344c9444d35d5e6715b2956c2078713bcd9fd2
|
data/README.md
CHANGED
@@ -2,26 +2,39 @@
|
|
2
2
|
|
3
3
|
[](https://travis-ci.org/ksss/digest-murmurhash)
|
4
4
|
|
5
|
-
|
5
|
+
**MurmurHash** is a algorithm desiged by Austin Appleby.
|
6
6
|
|
7
|
-
Digest::
|
7
|
+
**Digest::MurmurHash1** use algorithm MurmurHash1(32-bit).
|
8
8
|
|
9
|
-
|
9
|
+
**Digest::MurmurHash2** use algorithm MurmurHash2(32-bit).
|
10
|
+
|
11
|
+
(**Digest::MurmurHash** exist to write only version number)
|
10
12
|
|
11
|
-
All
|
13
|
+
All classes compliance Digest API of Ruby.
|
14
|
+
|
15
|
+
## Usage
|
12
16
|
|
13
|
-
You can use same interface built
|
17
|
+
You can use same interface built-in Digest::XXX classes.
|
14
18
|
|
15
19
|
```ruby
|
16
20
|
require 'digest/murmurhash'
|
17
21
|
|
18
|
-
|
19
|
-
|
22
|
+
# MurmurHash1 can use like same than Digest::XXX.
|
23
|
+
|
24
|
+
p Digest::MurmurHash1.hexdigest('murmurhash') #=> 'c709abd5'
|
25
|
+
p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> '712e9641'
|
26
|
+
|
27
|
+
# and MurmurHash2 too. but return another value because using another algorithm.
|
28
|
+
|
29
|
+
p Digest::MurmurHash2.hexdigest('murmurhash') #=> '33f67c7e'
|
30
|
+
p Digest::MurmurHash2.file("./LICENSE.txt").hexdigest #=> '78678326'
|
20
31
|
```
|
21
32
|
|
22
33
|
## Class tree
|
23
34
|
|
24
|
-
Digest::
|
35
|
+
**Digest::MurmurHash1** < Digest::StringBuffer
|
36
|
+
|
37
|
+
**Digest::MurmurHash2** < Digest::StringBuffer
|
25
38
|
|
26
39
|
## Installation
|
27
40
|
|
data/Rakefile
CHANGED
data/digest-murmurhash.gemspec
CHANGED
@@ -1,16 +1,15 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
lib = File.expand_path('../lib', __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
|
4
|
+
require 'digest/murmurhash/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "digest-murmurhash"
|
8
|
-
|
9
|
-
spec.version = "0.1.0"
|
8
|
+
spec.version = Digest::MurmurHash::VERSION
|
10
9
|
spec.author = "ksss"
|
11
10
|
spec.email = "co000ri@gmail.com"
|
12
|
-
spec.description = %q{Digest::
|
13
|
-
spec.summary = %q{Digest::
|
11
|
+
spec.description = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
|
12
|
+
spec.summary = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
|
14
13
|
spec.homepage = ""
|
15
14
|
spec.license = "MIT"
|
16
15
|
|
@@ -23,5 +22,6 @@ Gem::Specification.new do |spec|
|
|
23
22
|
spec.add_development_dependency "bundler", "~> 1.3"
|
24
23
|
spec.add_development_dependency "rake"
|
25
24
|
spec.add_development_dependency "rspec", ['~> 2.11']
|
26
|
-
spec.add_development_dependency "rake-compiler", ["~> 0.
|
25
|
+
spec.add_development_dependency "rake-compiler", ["~> 0.9.2"]
|
26
|
+
spec.add_development_dependency "digest-stringbuffer", ["~> 0.0.2"]
|
27
27
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#include "murmurhash.h"
|
2
|
+
#include "murmurhash1.h"
|
3
|
+
#include "murmurhash2.h"
|
4
|
+
#include "murmurhash2a.h"
|
5
|
+
|
6
|
+
void
|
7
|
+
Init_murmurhash(void)
|
8
|
+
{
|
9
|
+
VALUE mDigest, cDigest_StringBuffer;
|
10
|
+
VALUE cDigest_MurmurHash1, cDigest_MurmurHash2, cDigest_MurmurHash2A;
|
11
|
+
|
12
|
+
/* Digest::MurmurHash is require that Digest::StringBuffer */
|
13
|
+
rb_require("digest/stringbuffer");
|
14
|
+
mDigest = rb_path2class("Digest");
|
15
|
+
cDigest_StringBuffer = rb_path2class("Digest::StringBuffer");
|
16
|
+
|
17
|
+
/* class Digest::MurmurHash1 < Digest::StringBuffer */
|
18
|
+
cDigest_MurmurHash1 = rb_define_class_under(mDigest, "MurmurHash1", cDigest_StringBuffer);
|
19
|
+
rb_define_private_method(cDigest_MurmurHash1, "finish", murmur1_finish, 0);
|
20
|
+
rb_define_method(cDigest_MurmurHash1, "to_i", murmur1_to_i, 0);
|
21
|
+
rb_define_singleton_method(cDigest_MurmurHash1, "rawdigest", murmur1_s_rawdigest, -1);
|
22
|
+
|
23
|
+
/* class Digest::MurmurHash2 < Digest::StringBuffer */
|
24
|
+
cDigest_MurmurHash2 = rb_define_class_under(mDigest, "MurmurHash2", cDigest_StringBuffer);
|
25
|
+
rb_define_private_method(cDigest_MurmurHash2, "finish", murmur2_finish, 0);
|
26
|
+
rb_define_method(cDigest_MurmurHash2, "to_i", murmur2_to_i, 0);
|
27
|
+
rb_define_singleton_method(cDigest_MurmurHash2, "rawdigest", murmur2_s_rawdigest, -1);
|
28
|
+
|
29
|
+
/* class Digest::MurmurHash2A < Digest::StringBuffer */
|
30
|
+
cDigest_MurmurHash2A = rb_define_class_under(mDigest, "MurmurHash2A", cDigest_StringBuffer);
|
31
|
+
rb_define_private_method(cDigest_MurmurHash2A, "finish", murmur2a_finish, 0);
|
32
|
+
rb_define_method(cDigest_MurmurHash2A, "to_i", murmur2a_to_i, 0);
|
33
|
+
rb_define_singleton_method(cDigest_MurmurHash2A, "rawdigest", murmur2a_s_rawdigest, -1);
|
34
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#ifndef MURMURHASH_INCLUDED
|
2
|
+
# define MURMURHASH_INCLUDED
|
3
|
+
|
4
|
+
#include "ruby.h"
|
5
|
+
|
6
|
+
#define MURMURHASH_MAGIC 0x5bd1e995
|
7
|
+
|
8
|
+
/* should be same type structure to digest/stringbuffer */
|
9
|
+
typedef struct {
|
10
|
+
char* buffer;
|
11
|
+
char* p;
|
12
|
+
size_t memsize;
|
13
|
+
} buffer_t;
|
14
|
+
|
15
|
+
#define MURMURHASH(self, name) \
|
16
|
+
buffer_t* name; \
|
17
|
+
Data_Get_Struct(self, buffer_t, name); \
|
18
|
+
if (name == NULL) { \
|
19
|
+
rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
|
20
|
+
}
|
21
|
+
|
22
|
+
#endif /* ifndef MURMURHASH_INCLUDED */
|
23
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHash1 (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash1.h"
|
6
|
+
|
7
|
+
static inline size_t
|
8
|
+
murmur1(uint32_t h, const uint8_t r)
|
9
|
+
{
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
h *= m;
|
12
|
+
h ^= h >> r;
|
13
|
+
return h;
|
14
|
+
}
|
15
|
+
|
16
|
+
static uint32_t
|
17
|
+
murmur_hash_process1(const char *data, uint32_t length)
|
18
|
+
{
|
19
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
20
|
+
const uint8_t r = 16;
|
21
|
+
uint32_t h;
|
22
|
+
|
23
|
+
h = length * m;
|
24
|
+
|
25
|
+
while (4 <= length) {
|
26
|
+
h += *(uint32_t*)data;
|
27
|
+
h = murmur1(h, r);
|
28
|
+
data += 4;
|
29
|
+
length -= 4;
|
30
|
+
}
|
31
|
+
|
32
|
+
switch (length) {
|
33
|
+
case 3:
|
34
|
+
h += data[2] << 16;
|
35
|
+
case 2:
|
36
|
+
h += data[1] << 8;
|
37
|
+
case 1:
|
38
|
+
h += data[0];
|
39
|
+
h = murmur1(h, r);
|
40
|
+
}
|
41
|
+
|
42
|
+
h = murmur1(h, 10);
|
43
|
+
h = murmur1(h, 17);
|
44
|
+
|
45
|
+
return h;
|
46
|
+
}
|
47
|
+
|
48
|
+
VALUE
|
49
|
+
murmur1_finish(VALUE self)
|
50
|
+
{
|
51
|
+
uint32_t h;
|
52
|
+
uint8_t digest[4];
|
53
|
+
MURMURHASH(self, ptr);
|
54
|
+
|
55
|
+
h = murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer);
|
56
|
+
|
57
|
+
digest[0] = h >> 24;
|
58
|
+
digest[1] = h >> 16;
|
59
|
+
digest[2] = h >> 8;
|
60
|
+
digest[3] = h;
|
61
|
+
|
62
|
+
return rb_str_new((const char*) digest, 4);
|
63
|
+
}
|
64
|
+
|
65
|
+
VALUE
|
66
|
+
murmur1_to_i(VALUE self)
|
67
|
+
{
|
68
|
+
MURMURHASH(self, ptr);
|
69
|
+
return UINT2NUM(murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer));
|
70
|
+
}
|
71
|
+
|
72
|
+
VALUE
|
73
|
+
murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
74
|
+
{
|
75
|
+
VALUE str;
|
76
|
+
|
77
|
+
if (argc < 1)
|
78
|
+
rb_raise(rb_eArgError, "no data given");
|
79
|
+
|
80
|
+
str = *argv++;
|
81
|
+
argc--;
|
82
|
+
|
83
|
+
StringValue(str);
|
84
|
+
|
85
|
+
return UINT2NUM(murmur_hash_process1(RSTRING_PTR(str), RSTRING_LEN(str)));
|
86
|
+
}
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef MURMURHASH1_INCLUDED
|
2
|
+
# define MURMURHASH1_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur1_finish(VALUE self);
|
7
|
+
VALUE murmur1_to_i(VALUE self);
|
8
|
+
VALUE murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
|
10
|
+
#endif /* ifndef MURMURHASH1_INCLUDED */
|
11
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHash2 (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash2.h"
|
6
|
+
|
7
|
+
static inline size_t
|
8
|
+
murmur2(uint32_t h, uint32_t k, const uint8_t r)
|
9
|
+
{
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
k *= m;
|
12
|
+
k ^= k >> r;
|
13
|
+
k *= m;
|
14
|
+
|
15
|
+
h *= m;
|
16
|
+
h ^= k;
|
17
|
+
return h;
|
18
|
+
}
|
19
|
+
|
20
|
+
static uint32_t
|
21
|
+
murmur_hash_process2(const char *data, uint32_t length)
|
22
|
+
{
|
23
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
24
|
+
const uint8_t r = 24;
|
25
|
+
uint32_t h, k;
|
26
|
+
|
27
|
+
h = length * m;
|
28
|
+
|
29
|
+
while (4 <= length) {
|
30
|
+
k = *(uint32_t*)data;
|
31
|
+
h = murmur2(h, k, r);
|
32
|
+
data += 4;
|
33
|
+
length -= 4;
|
34
|
+
}
|
35
|
+
|
36
|
+
switch (length) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
}
|
42
|
+
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
|
+
|
47
|
+
return h;
|
48
|
+
}
|
49
|
+
|
50
|
+
VALUE
|
51
|
+
murmur2_finish(VALUE self)
|
52
|
+
{
|
53
|
+
uint32_t h;
|
54
|
+
uint8_t digest[4];
|
55
|
+
MURMURHASH(self, ptr);
|
56
|
+
|
57
|
+
h = murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer);
|
58
|
+
|
59
|
+
digest[0] = h >> 24;
|
60
|
+
digest[1] = h >> 16;
|
61
|
+
digest[2] = h >> 8;
|
62
|
+
digest[3] = h;
|
63
|
+
|
64
|
+
return rb_str_new((const char*) digest, 4);
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE
|
68
|
+
murmur2_to_i(VALUE self)
|
69
|
+
{
|
70
|
+
MURMURHASH(self, ptr);
|
71
|
+
return UINT2NUM(murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer));
|
72
|
+
}
|
73
|
+
|
74
|
+
VALUE
|
75
|
+
murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
76
|
+
{
|
77
|
+
VALUE str;
|
78
|
+
volatile VALUE obj;
|
79
|
+
|
80
|
+
if (argc < 1)
|
81
|
+
rb_raise(rb_eArgError, "no data given");
|
82
|
+
|
83
|
+
str = *argv++;
|
84
|
+
argc--;
|
85
|
+
|
86
|
+
StringValue(str);
|
87
|
+
|
88
|
+
return UINT2NUM(murmur_hash_process2(RSTRING_PTR(str), RSTRING_LEN(str)));
|
89
|
+
}
|
90
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef MURMURHASH2_INCLUDED
|
2
|
+
# define MURMURHASH2_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur2_finish(VALUE self);
|
7
|
+
VALUE murmur2_to_i(VALUE self);
|
8
|
+
VALUE murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
|
10
|
+
#endif /* ifndef MURMURHASH2_INCLUDED */
|
11
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHash2A (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash2a.h"
|
6
|
+
|
7
|
+
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
8
|
+
|
9
|
+
static uint32_t
|
10
|
+
murmur_hash_process2a(const void *key, uint32_t length)
|
11
|
+
{
|
12
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
13
|
+
const uint8_t r = 24;
|
14
|
+
uint32_t h, k, t, l;
|
15
|
+
const unsigned char *data = (const unsigned char *) key;
|
16
|
+
|
17
|
+
l = length;
|
18
|
+
h = 0 ^ length;
|
19
|
+
|
20
|
+
while (4 <= length) {
|
21
|
+
k = *(uint32_t*)data;
|
22
|
+
mmix(h,k);
|
23
|
+
data += 4;
|
24
|
+
length -= 4;
|
25
|
+
}
|
26
|
+
|
27
|
+
t = 0;
|
28
|
+
switch (length) {
|
29
|
+
case 3: t ^= data[2] << 16;
|
30
|
+
case 2: t ^= data[1] << 8;
|
31
|
+
case 1: t ^= data[0];
|
32
|
+
}
|
33
|
+
|
34
|
+
mmix(h,t);
|
35
|
+
mmix(h,l);
|
36
|
+
|
37
|
+
h ^= h >> 13;
|
38
|
+
h *= m;
|
39
|
+
h ^= h >> 15;
|
40
|
+
|
41
|
+
return h;
|
42
|
+
}
|
43
|
+
|
44
|
+
VALUE
|
45
|
+
murmur2a_finish(VALUE self)
|
46
|
+
{
|
47
|
+
uint32_t h;
|
48
|
+
uint8_t digest[4];
|
49
|
+
MURMURHASH(self, ptr);
|
50
|
+
|
51
|
+
h = murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer);
|
52
|
+
|
53
|
+
digest[0] = h >> 24;
|
54
|
+
digest[1] = h >> 16;
|
55
|
+
digest[2] = h >> 8;
|
56
|
+
digest[3] = h;
|
57
|
+
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE
|
62
|
+
murmur2a_to_i(VALUE self)
|
63
|
+
{
|
64
|
+
MURMURHASH(self, ptr);
|
65
|
+
return UINT2NUM(murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer));
|
66
|
+
}
|
67
|
+
|
68
|
+
VALUE
|
69
|
+
murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
70
|
+
{
|
71
|
+
VALUE str;
|
72
|
+
volatile VALUE obj;
|
73
|
+
|
74
|
+
if (argc < 1)
|
75
|
+
rb_raise(rb_eArgError, "no data given");
|
76
|
+
|
77
|
+
str = *argv++;
|
78
|
+
argc--;
|
79
|
+
|
80
|
+
StringValue(str);
|
81
|
+
|
82
|
+
return UINT2NUM(murmur_hash_process2a(RSTRING_PTR(str), RSTRING_LEN(str)));
|
83
|
+
}
|
84
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef MURMURHASH2A_INCLUDED
|
2
|
+
# define MURMURHASH2A_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur2a_finish(VALUE self);
|
7
|
+
VALUE murmur2a_to_i(VALUE self);
|
8
|
+
VALUE murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
|
10
|
+
#endif /* ifndef MURMURHASH2A_INCLUDED */
|
11
|
+
|
data/spec/bench.rb
CHANGED
@@ -3,9 +3,24 @@
|
|
3
3
|
lib = File.expand_path('../../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
+
require 'digest/stringbuffer'
|
6
7
|
require 'digest/murmurhash'
|
7
8
|
require 'benchmark'
|
8
9
|
|
10
|
+
class Prime37 < Digest::StringBuffer
|
11
|
+
def initialize
|
12
|
+
@prime = 37
|
13
|
+
end
|
14
|
+
|
15
|
+
def finish
|
16
|
+
result = 0
|
17
|
+
buffer.unpack("C*").each do |c|
|
18
|
+
result += (c * @prime)
|
19
|
+
end
|
20
|
+
[result & 0xffffffff].pack("N")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
9
24
|
class Integer
|
10
25
|
def to_32
|
11
26
|
self & 0xffffffff
|
@@ -44,30 +59,82 @@ def murmur_hash str
|
|
44
59
|
h = (h * m).to_32
|
45
60
|
h ^= h >> 17
|
46
61
|
|
47
|
-
h
|
62
|
+
h
|
48
63
|
end
|
49
64
|
|
65
|
+
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
50
66
|
def rand_str
|
51
67
|
rand = "";
|
52
|
-
|
68
|
+
20.times {
|
69
|
+
rand << @rands[rand(62)]
|
70
|
+
}
|
53
71
|
rand
|
54
72
|
end
|
55
73
|
|
56
|
-
|
57
|
-
|
58
|
-
|
74
|
+
n = 100000
|
75
|
+
times_enum = n.times
|
76
|
+
|
77
|
+
a = Array.new(n, 0)
|
78
|
+
n.times do |i|
|
79
|
+
a[i] = rand_str
|
80
|
+
end
|
81
|
+
|
82
|
+
c = Struct.new "Cases",
|
83
|
+
:name,
|
84
|
+
:func
|
85
|
+
cases = [
|
86
|
+
c.new("pureRuby", proc{|x| murmur_hash x }),
|
87
|
+
c.new("Prime37", proc{|x| Prime37.digest x }),
|
88
|
+
c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x }),
|
89
|
+
c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x }),
|
90
|
+
c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x }),
|
91
|
+
]
|
92
|
+
|
93
|
+
reals = {}
|
94
|
+
confrict = {}
|
95
|
+
confricts = {}
|
59
96
|
|
97
|
+
puts "### condition"
|
98
|
+
puts
|
99
|
+
puts " RUBY_VERSION = #{RUBY_VERSION}"
|
100
|
+
puts " count = #{n}"
|
101
|
+
puts
|
102
|
+
puts "### benchmark"
|
103
|
+
puts
|
104
|
+
puts "```"
|
60
105
|
Benchmark.bm do |x|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
106
|
+
cases.each do |c|
|
107
|
+
z = x.report c.name do
|
108
|
+
times_enum.each do |i|
|
109
|
+
c.func.call(a[i])
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
confrict.clear
|
114
|
+
times_enum.each do |i|
|
115
|
+
rethash = c.func.call(a[i])
|
116
|
+
if confrict[rethash].nil?
|
117
|
+
confrict[rethash] = 0
|
118
|
+
else
|
119
|
+
confrict[rethash] += 1
|
120
|
+
end
|
121
|
+
end
|
122
|
+
reals[c.name] = z.real
|
123
|
+
confricts[c.name] = confrict.count{|hash, count| 0 < count}
|
124
|
+
end
|
125
|
+
end
|
126
|
+
puts "```"
|
127
|
+
|
128
|
+
puts
|
129
|
+
puts "### real second rate (pureRuby/)"
|
130
|
+
puts
|
131
|
+
reals.each do |name, real|
|
132
|
+
puts " " + (reals["pureRuby"] / real).to_s + "/" + name
|
133
|
+
end
|
66
134
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
}}
|
135
|
+
puts
|
136
|
+
puts "### confrict count (/#{n})"
|
137
|
+
puts
|
138
|
+
confricts.each do |name, count|
|
139
|
+
puts " #{name}: #{count}"
|
73
140
|
end
|
data/spec/digest_spec.rb
CHANGED
@@ -1,60 +1,82 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Digest::
|
4
|
-
let :
|
5
|
-
|
3
|
+
describe "Digest::MurmurHash1 and 2" do
|
4
|
+
let :all do
|
5
|
+
[MurmurHash1, MurmurHash2, MurmurHash2A]
|
6
6
|
end
|
7
7
|
|
8
8
|
it "initialize" do
|
9
|
-
expect(
|
9
|
+
expect(MurmurHash1.new).to be_a_kind_of(Digest::StringBuffer)
|
10
|
+
expect(MurmurHash2.new).to be_a_kind_of(Digest::StringBuffer)
|
11
|
+
expect(MurmurHash2A.new).to be_a_kind_of(Digest::StringBuffer)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "digest" do
|
15
|
+
expect(MurmurHash1.digest("a" * 1024)).to eq("\xa1\x52\x2e\x5f".force_encoding("ASCII-8BIT"))
|
16
|
+
expect(MurmurHash2.digest("a" * 1024)).to eq("\xd0\x0c\x31\x2f".force_encoding("ASCII-8BIT"))
|
17
|
+
expect(MurmurHash2A.digest("a" * 1024)).to eq("\xd5\x2d\xb1\x67".force_encoding("ASCII-8BIT"))
|
10
18
|
end
|
11
19
|
|
12
20
|
it "hexdigest" do
|
13
|
-
expect(
|
21
|
+
expect(MurmurHash1.hexdigest("a" * 1024)).to eq("a1522e5f")
|
22
|
+
expect(MurmurHash2.hexdigest("a" * 1024)).to eq("d00c312f")
|
23
|
+
expect(MurmurHash2A.hexdigest("a" * 1024)).to eq("d52db167")
|
14
24
|
end
|
15
25
|
|
16
|
-
it "
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
murmur.update("m")
|
21
|
-
murmur.update("u")
|
22
|
-
murmur.update("r")
|
23
|
-
murmur.update("hash")
|
24
|
-
expect(murmur.hexdigest).to eq("c709abd5");
|
25
|
-
expect(murmur.hexdigest).to eq("c709abd5");
|
26
|
-
expect(murmur.hexdigest!).to eq("c709abd5");
|
27
|
-
expect(murmur.hexdigest).to eq("00000000");
|
26
|
+
it "rawdigest" do
|
27
|
+
expect(MurmurHash1.rawdigest("a" * 1024)).to eq(0xa1522e5f)
|
28
|
+
expect(MurmurHash2.rawdigest("a" * 1024)).to eq(0xd00c312f)
|
29
|
+
expect(MurmurHash2A.rawdigest("a" * 1024)).to eq(0xd52db167)
|
28
30
|
end
|
29
31
|
|
30
|
-
it "
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
it "update and reset and hexdigest" do
|
33
|
+
{
|
34
|
+
MurmurHash1 => "c709abd5",
|
35
|
+
MurmurHash2 => "33f67c7e",
|
36
|
+
MurmurHash2A => "df25554b",
|
37
|
+
}.each do |c, should|
|
38
|
+
murmur = c.new
|
39
|
+
murmur.update("m").update("u").update("r")
|
40
|
+
murmur << "m" << "u" << "r"
|
41
|
+
murmur << "hash"
|
42
|
+
expect(murmur.hexdigest).to eq(should);
|
43
|
+
expect(murmur.hexdigest).to eq(should);
|
44
|
+
expect(murmur.hexdigest!).to eq(should);
|
45
|
+
expect(murmur.hexdigest).to eq("00000000");
|
46
|
+
end
|
37
47
|
end
|
38
48
|
|
39
49
|
it "==" do
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
50
|
+
all.each do |c|
|
51
|
+
["", "murmur", "murmurhash" * 1024].each do |str|
|
52
|
+
murmur1 = c.new
|
53
|
+
murmur2 = c.new
|
54
|
+
expect(murmur1.update(str) == murmur2.update(str)).to be_true
|
55
|
+
end
|
44
56
|
end
|
45
57
|
end
|
46
58
|
|
47
|
-
it "
|
48
|
-
|
59
|
+
it "dup" do
|
60
|
+
all.each do |c|
|
61
|
+
murmur1 = c.new
|
62
|
+
murmur2 = c.new
|
63
|
+
10.times {
|
64
|
+
murmur1 = murmur1.update("murmurhash" * 100).dup
|
65
|
+
}
|
66
|
+
murmur2.update(("murmurhash" * 100) * 10)
|
67
|
+
expect(murmur1 == murmur2).to be_true
|
68
|
+
end
|
49
69
|
end
|
50
70
|
|
51
|
-
it "
|
52
|
-
|
53
|
-
|
54
|
-
expect(
|
71
|
+
it "length" do
|
72
|
+
expect(MurmurHash1.new.length).to eq(4);
|
73
|
+
expect(MurmurHash2.new.length).to eq(4);
|
74
|
+
expect(MurmurHash2A.new.length).to eq(4);
|
55
75
|
end
|
56
76
|
|
57
77
|
it "to_i" do
|
58
|
-
expect(
|
78
|
+
expect(MurmurHash1.new.update("murmurhash").to_i).to eq(0xc709abd5);
|
79
|
+
expect(MurmurHash2.new.update("murmurhash").to_i).to eq(0x33f67c7e);
|
80
|
+
expect(MurmurHash2A.new.update("murmurhash").to_i).to eq(0xdf25554b);
|
59
81
|
end
|
60
82
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MurmurHash do
|
4
|
+
let :all do
|
5
|
+
[MurmurHash1, MurmurHash2, MurmurHash2A]
|
6
|
+
end
|
7
|
+
|
8
|
+
it "update nil" do
|
9
|
+
all.each do |c|
|
10
|
+
murmur = c.new
|
11
|
+
expect{ murmur.update }.to raise_error(ArgumentError)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it "rawdigest no arguments" do
|
16
|
+
all.each do |c|
|
17
|
+
expect{ c.rawdigest }.to raise_error(ArgumentError)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/spec/mem_spec.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe MurmurHash do
|
4
4
|
it "gc safe" do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
{
|
6
|
+
MurmurHash1 => "c709abd5",
|
7
|
+
MurmurHash2 => "33f67c7e",
|
8
|
+
MurmurHash2A => "df25554b",
|
9
|
+
}.each do |c, should|
|
10
|
+
murmur = c.new
|
11
|
+
GC.start
|
12
|
+
murmur.update("murmur")
|
13
|
+
GC.start
|
14
|
+
expect(murmur.update("hash").to_s).to eq(should);
|
15
|
+
end
|
10
16
|
end
|
11
17
|
end
|
12
18
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-murmurhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ksss
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,16 +58,30 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
61
|
+
version: 0.9.2
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
69
|
-
|
70
|
-
|
68
|
+
version: 0.9.2
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: digest-stringbuffer
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.0.2
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.0.2
|
83
|
+
description: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
|
84
|
+
by Austin Appleby.
|
71
85
|
email: co000ri@gmail.com
|
72
86
|
executables: []
|
73
87
|
extensions:
|
@@ -82,10 +96,19 @@ files:
|
|
82
96
|
- Rakefile
|
83
97
|
- digest-murmurhash.gemspec
|
84
98
|
- ext/digest/murmurhash/extconf.rb
|
85
|
-
- ext/digest/murmurhash/
|
99
|
+
- ext/digest/murmurhash/init.c
|
100
|
+
- ext/digest/murmurhash/murmurhash.h
|
101
|
+
- ext/digest/murmurhash/murmurhash1.c
|
102
|
+
- ext/digest/murmurhash/murmurhash1.h
|
103
|
+
- ext/digest/murmurhash/murmurhash2.c
|
104
|
+
- ext/digest/murmurhash/murmurhash2.h
|
105
|
+
- ext/digest/murmurhash/murmurhash2a.c
|
106
|
+
- ext/digest/murmurhash/murmurhash2a.h
|
107
|
+
- lib/digest/murmurhash.rb
|
86
108
|
- lib/digest/murmurhash/version.rb
|
87
109
|
- spec/bench.rb
|
88
110
|
- spec/digest_spec.rb
|
111
|
+
- spec/exception_spec.rb
|
89
112
|
- spec/mem_spec.rb
|
90
113
|
- spec/spec_helper.rb
|
91
114
|
homepage: ''
|
@@ -108,13 +131,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
131
|
version: '0'
|
109
132
|
requirements: []
|
110
133
|
rubyforge_project:
|
111
|
-
rubygems_version: 2.1.
|
134
|
+
rubygems_version: 2.1.11
|
112
135
|
signing_key:
|
113
136
|
specification_version: 4
|
114
|
-
summary: Digest::
|
115
|
-
|
137
|
+
summary: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
|
138
|
+
by Austin Appleby.
|
116
139
|
test_files:
|
117
140
|
- spec/bench.rb
|
118
141
|
- spec/digest_spec.rb
|
142
|
+
- spec/exception_spec.rb
|
119
143
|
- spec/mem_spec.rb
|
120
144
|
- spec/spec_helper.rb
|
@@ -1,209 +0,0 @@
|
|
1
|
-
#include "ruby.h"
|
2
|
-
#ifdef HAVE_RUBY_DIGEST_H
|
3
|
-
#include "ruby/digest.h"
|
4
|
-
#else
|
5
|
-
#include "digest.h"
|
6
|
-
#endif
|
7
|
-
|
8
|
-
#define MURMURHASH_DIGEST_LENGTH 4
|
9
|
-
#define MURMURHASH_BLOCK_LENGTH 4
|
10
|
-
|
11
|
-
|
12
|
-
ID id_concat;
|
13
|
-
|
14
|
-
typedef struct {
|
15
|
-
char* data;
|
16
|
-
char* p;
|
17
|
-
size_t memsize;
|
18
|
-
} murmur_t;
|
19
|
-
|
20
|
-
#define MURMURHASH(self, name) \
|
21
|
-
murmur_t* name; \
|
22
|
-
Data_Get_Struct(self, murmur_t, name); \
|
23
|
-
if (name == NULL) { \
|
24
|
-
rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
|
25
|
-
}
|
26
|
-
|
27
|
-
static void
|
28
|
-
murmur_init(murmur_t* ptr)
|
29
|
-
{
|
30
|
-
ptr->data = (char*) malloc(sizeof(char) * 64);
|
31
|
-
ptr->p = ptr->data;
|
32
|
-
ptr->memsize = 64;
|
33
|
-
}
|
34
|
-
|
35
|
-
static void
|
36
|
-
murmur_mark(murmur_t* ptr)
|
37
|
-
{
|
38
|
-
}
|
39
|
-
|
40
|
-
static void
|
41
|
-
murmur_free(murmur_t* ptr)
|
42
|
-
{
|
43
|
-
free(ptr->data);
|
44
|
-
}
|
45
|
-
|
46
|
-
static VALUE
|
47
|
-
murmur_alloc(VALUE self)
|
48
|
-
{
|
49
|
-
murmur_t* ptr = ALLOC(murmur_t);
|
50
|
-
murmur_init(ptr);
|
51
|
-
return Data_Wrap_Struct(self, murmur_mark, murmur_free, ptr);
|
52
|
-
}
|
53
|
-
|
54
|
-
static VALUE
|
55
|
-
murmur_initialize_copy(VALUE copy, VALUE origin)
|
56
|
-
{
|
57
|
-
murmur_t *ptr_copy, *ptr_origin;
|
58
|
-
size_t data_len;
|
59
|
-
|
60
|
-
if (copy == origin) return copy;
|
61
|
-
|
62
|
-
rb_check_frozen(copy);
|
63
|
-
|
64
|
-
Data_Get_Struct(copy, murmur_t, ptr_copy);
|
65
|
-
Data_Get_Struct(origin, murmur_t, ptr_origin);
|
66
|
-
|
67
|
-
data_len = ptr_origin->p - ptr_origin->data;
|
68
|
-
ptr_copy->data = (char*) malloc(sizeof(char) * ptr_origin->memsize);
|
69
|
-
memcpy(ptr_copy->data, ptr_origin->data, data_len);
|
70
|
-
ptr_copy->p = ptr_copy->data + data_len;
|
71
|
-
ptr_copy->memsize = ptr_origin->memsize;
|
72
|
-
|
73
|
-
return copy;
|
74
|
-
}
|
75
|
-
|
76
|
-
static VALUE
|
77
|
-
murmur_reset(VALUE self)
|
78
|
-
{
|
79
|
-
MURMURHASH(self, ptr);
|
80
|
-
ptr->p = ptr->data;
|
81
|
-
return self;
|
82
|
-
}
|
83
|
-
|
84
|
-
static VALUE
|
85
|
-
murmur_update(VALUE self, VALUE str)
|
86
|
-
{
|
87
|
-
size_t data_len, str_len, require, newsize;
|
88
|
-
const char* str_p;
|
89
|
-
MURMURHASH(self, ptr);
|
90
|
-
|
91
|
-
StringValue(str);
|
92
|
-
str_p = RSTRING_PTR(str);
|
93
|
-
str_len = RSTRING_LEN(str);
|
94
|
-
data_len = (ptr->p - ptr->data);
|
95
|
-
require = data_len + str_len;
|
96
|
-
if (ptr->memsize < require) {
|
97
|
-
newsize = ptr->memsize;
|
98
|
-
while (newsize < require) {
|
99
|
-
newsize *= 2;
|
100
|
-
}
|
101
|
-
ptr->data = realloc(ptr->data, sizeof(char) * newsize);
|
102
|
-
ptr->p = ptr->data + data_len;
|
103
|
-
ptr->memsize = newsize;
|
104
|
-
}
|
105
|
-
memcpy(ptr->p, str_p, str_len);
|
106
|
-
ptr->p += str_len;
|
107
|
-
|
108
|
-
return self;
|
109
|
-
}
|
110
|
-
|
111
|
-
static uint32_t
|
112
|
-
murmur_hash_process(murmur_t* ptr)
|
113
|
-
{
|
114
|
-
const uint32_t m = 0x5bd1e995;
|
115
|
-
const uint8_t r = 16;
|
116
|
-
uint32_t length, h;
|
117
|
-
const char* p;
|
118
|
-
|
119
|
-
p = ptr->data;
|
120
|
-
length = ptr->p - ptr->data;
|
121
|
-
h = length * m;
|
122
|
-
|
123
|
-
while (4 <= length) {
|
124
|
-
h += *(uint32_t*)p;
|
125
|
-
h *= m;
|
126
|
-
h ^= h >> r;
|
127
|
-
p += 4;
|
128
|
-
length -= 4;
|
129
|
-
}
|
130
|
-
|
131
|
-
switch (length) {
|
132
|
-
case 3:
|
133
|
-
h += p[2] << 16;
|
134
|
-
case 2:
|
135
|
-
h += p[1] << 8;
|
136
|
-
case 1:
|
137
|
-
h += p[0];
|
138
|
-
h *= m;
|
139
|
-
h ^= h >> r;
|
140
|
-
}
|
141
|
-
|
142
|
-
h *= m;
|
143
|
-
h ^= h >> 10;
|
144
|
-
h *= m;
|
145
|
-
h ^= h >> 17;
|
146
|
-
|
147
|
-
return h;
|
148
|
-
}
|
149
|
-
|
150
|
-
static VALUE
|
151
|
-
murmur_finish(VALUE self)
|
152
|
-
{
|
153
|
-
uint32_t h;
|
154
|
-
uint8_t digest[MURMURHASH_DIGEST_LENGTH];
|
155
|
-
MURMURHASH(self, ptr);
|
156
|
-
|
157
|
-
h = murmur_hash_process(ptr);
|
158
|
-
|
159
|
-
digest[0] = (h >> 24);
|
160
|
-
digest[1] = (h >> 16);
|
161
|
-
digest[2] = (h >> 8);
|
162
|
-
digest[3] = (h);
|
163
|
-
|
164
|
-
return rb_str_new((const char*) digest, 4);
|
165
|
-
}
|
166
|
-
|
167
|
-
static VALUE
|
168
|
-
murmur_digest_length(VALUE self)
|
169
|
-
{
|
170
|
-
return INT2NUM(MURMURHASH_DIGEST_LENGTH);
|
171
|
-
}
|
172
|
-
|
173
|
-
static VALUE
|
174
|
-
murmur_block_length(VALUE self)
|
175
|
-
{
|
176
|
-
return INT2NUM(MURMURHASH_BLOCK_LENGTH);
|
177
|
-
}
|
178
|
-
|
179
|
-
static VALUE
|
180
|
-
murmur_to_i(VALUE self)
|
181
|
-
{
|
182
|
-
MURMURHASH(self, ptr);
|
183
|
-
return UINT2NUM(murmur_hash_process(ptr));
|
184
|
-
}
|
185
|
-
|
186
|
-
void
|
187
|
-
Init_murmurhash()
|
188
|
-
{
|
189
|
-
VALUE mDigest, cDigest_Base, cDigest_MurmurHash;
|
190
|
-
|
191
|
-
id_concat = rb_intern("concat");
|
192
|
-
|
193
|
-
rb_require("digest");
|
194
|
-
|
195
|
-
mDigest = rb_path2class("Digest");
|
196
|
-
cDigest_Base = rb_path2class("Digest::Base");
|
197
|
-
|
198
|
-
cDigest_MurmurHash = rb_define_class_under(mDigest, "MurmurHash", cDigest_Base);
|
199
|
-
|
200
|
-
rb_define_alloc_func(cDigest_MurmurHash, murmur_alloc);
|
201
|
-
rb_define_method(cDigest_MurmurHash, "initialize_copy", murmur_initialize_copy, 1);
|
202
|
-
rb_define_method(cDigest_MurmurHash, "reset", murmur_reset, 0);
|
203
|
-
rb_define_method(cDigest_MurmurHash, "update", murmur_update, 1);
|
204
|
-
rb_define_private_method(cDigest_MurmurHash, "finish", murmur_finish, 0);
|
205
|
-
rb_define_method(cDigest_MurmurHash, "digest_length", murmur_digest_length, 0);
|
206
|
-
rb_define_method(cDigest_MurmurHash, "block_length", murmur_block_length, 0);
|
207
|
-
|
208
|
-
rb_define_method(cDigest_MurmurHash, "to_i", murmur_to_i, 0);
|
209
|
-
}
|