digest-murmurhash 0.1.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -8
- data/Rakefile +1 -1
- data/digest-murmurhash.gemspec +6 -6
- data/ext/digest/murmurhash/extconf.rb +1 -1
- data/ext/digest/murmurhash/init.c +34 -0
- data/ext/digest/murmurhash/murmurhash.h +23 -0
- data/ext/digest/murmurhash/murmurhash1.c +86 -0
- data/ext/digest/murmurhash/murmurhash1.h +11 -0
- data/ext/digest/murmurhash/murmurhash2.c +90 -0
- data/ext/digest/murmurhash/murmurhash2.h +11 -0
- data/ext/digest/murmurhash/murmurhash2a.c +84 -0
- data/ext/digest/murmurhash/murmurhash2a.h +11 -0
- data/lib/digest/murmurhash.rb +6 -0
- data/lib/digest/murmurhash/version.rb +1 -1
- data/spec/bench.rb +83 -16
- data/spec/digest_spec.rb +57 -35
- data/spec/exception_spec.rb +20 -0
- data/spec/mem_spec.rb +12 -6
- data/spec/spec_helper.rb +1 -0
- metadata +34 -10
- data/ext/digest/murmurhash/murmurhash.c +0 -209
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 635a6c34802609c512789d68c9ebe571df48cc01
|
4
|
+
data.tar.gz: 1a1884fb59d6a9ccfca3c5da969e688589c093f0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2633494b6b099a2149deeb3c39af1eb81e80435156bc459b4aee9498ca5209f41ac5a400713b6ae6c762935c5a1d964bc408e63a6e21e0634778d4a78c63f329
|
7
|
+
data.tar.gz: 9c61561046045532d0c8fe863059625cb532d051ea978f16fc63b0bd7fada9fded44900d91642695253d50fd94344c9444d35d5e6715b2956c2078713bcd9fd2
|
data/README.md
CHANGED
@@ -2,26 +2,39 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/ksss/digest-murmurhash.png?branch=master)](https://travis-ci.org/ksss/digest-murmurhash)
|
4
4
|
|
5
|
-
|
5
|
+
**MurmurHash** is a algorithm desiged by Austin Appleby.
|
6
6
|
|
7
|
-
Digest::
|
7
|
+
**Digest::MurmurHash1** use algorithm MurmurHash1(32-bit).
|
8
8
|
|
9
|
-
|
9
|
+
**Digest::MurmurHash2** use algorithm MurmurHash2(32-bit).
|
10
|
+
|
11
|
+
(**Digest::MurmurHash** exist to write only version number)
|
10
12
|
|
11
|
-
All
|
13
|
+
All classes compliance Digest API of Ruby.
|
14
|
+
|
15
|
+
## Usage
|
12
16
|
|
13
|
-
You can use same interface built
|
17
|
+
You can use same interface built-in Digest::XXX classes.
|
14
18
|
|
15
19
|
```ruby
|
16
20
|
require 'digest/murmurhash'
|
17
21
|
|
18
|
-
|
19
|
-
|
22
|
+
# MurmurHash1 can use like same than Digest::XXX.
|
23
|
+
|
24
|
+
p Digest::MurmurHash1.hexdigest('murmurhash') #=> 'c709abd5'
|
25
|
+
p Digest::MurmurHash1.file("./LICENSE.txt").hexdigest #=> '712e9641'
|
26
|
+
|
27
|
+
# and MurmurHash2 too. but return another value because using another algorithm.
|
28
|
+
|
29
|
+
p Digest::MurmurHash2.hexdigest('murmurhash') #=> '33f67c7e'
|
30
|
+
p Digest::MurmurHash2.file("./LICENSE.txt").hexdigest #=> '78678326'
|
20
31
|
```
|
21
32
|
|
22
33
|
## Class tree
|
23
34
|
|
24
|
-
Digest::
|
35
|
+
**Digest::MurmurHash1** < Digest::StringBuffer
|
36
|
+
|
37
|
+
**Digest::MurmurHash2** < Digest::StringBuffer
|
25
38
|
|
26
39
|
## Installation
|
27
40
|
|
data/Rakefile
CHANGED
data/digest-murmurhash.gemspec
CHANGED
@@ -1,16 +1,15 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
lib = File.expand_path('../lib', __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
|
4
|
+
require 'digest/murmurhash/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "digest-murmurhash"
|
8
|
-
|
9
|
-
spec.version = "0.1.0"
|
8
|
+
spec.version = Digest::MurmurHash::VERSION
|
10
9
|
spec.author = "ksss"
|
11
10
|
spec.email = "co000ri@gmail.com"
|
12
|
-
spec.description = %q{Digest::
|
13
|
-
spec.summary = %q{Digest::
|
11
|
+
spec.description = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
|
12
|
+
spec.summary = %q{Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged by Austin Appleby.}
|
14
13
|
spec.homepage = ""
|
15
14
|
spec.license = "MIT"
|
16
15
|
|
@@ -23,5 +22,6 @@ Gem::Specification.new do |spec|
|
|
23
22
|
spec.add_development_dependency "bundler", "~> 1.3"
|
24
23
|
spec.add_development_dependency "rake"
|
25
24
|
spec.add_development_dependency "rspec", ['~> 2.11']
|
26
|
-
spec.add_development_dependency "rake-compiler", ["~> 0.
|
25
|
+
spec.add_development_dependency "rake-compiler", ["~> 0.9.2"]
|
26
|
+
spec.add_development_dependency "digest-stringbuffer", ["~> 0.0.2"]
|
27
27
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#include "murmurhash.h"
|
2
|
+
#include "murmurhash1.h"
|
3
|
+
#include "murmurhash2.h"
|
4
|
+
#include "murmurhash2a.h"
|
5
|
+
|
6
|
+
void
|
7
|
+
Init_murmurhash(void)
|
8
|
+
{
|
9
|
+
VALUE mDigest, cDigest_StringBuffer;
|
10
|
+
VALUE cDigest_MurmurHash1, cDigest_MurmurHash2, cDigest_MurmurHash2A;
|
11
|
+
|
12
|
+
/* Digest::MurmurHash is require that Digest::StringBuffer */
|
13
|
+
rb_require("digest/stringbuffer");
|
14
|
+
mDigest = rb_path2class("Digest");
|
15
|
+
cDigest_StringBuffer = rb_path2class("Digest::StringBuffer");
|
16
|
+
|
17
|
+
/* class Digest::MurmurHash1 < Digest::StringBuffer */
|
18
|
+
cDigest_MurmurHash1 = rb_define_class_under(mDigest, "MurmurHash1", cDigest_StringBuffer);
|
19
|
+
rb_define_private_method(cDigest_MurmurHash1, "finish", murmur1_finish, 0);
|
20
|
+
rb_define_method(cDigest_MurmurHash1, "to_i", murmur1_to_i, 0);
|
21
|
+
rb_define_singleton_method(cDigest_MurmurHash1, "rawdigest", murmur1_s_rawdigest, -1);
|
22
|
+
|
23
|
+
/* class Digest::MurmurHash2 < Digest::StringBuffer */
|
24
|
+
cDigest_MurmurHash2 = rb_define_class_under(mDigest, "MurmurHash2", cDigest_StringBuffer);
|
25
|
+
rb_define_private_method(cDigest_MurmurHash2, "finish", murmur2_finish, 0);
|
26
|
+
rb_define_method(cDigest_MurmurHash2, "to_i", murmur2_to_i, 0);
|
27
|
+
rb_define_singleton_method(cDigest_MurmurHash2, "rawdigest", murmur2_s_rawdigest, -1);
|
28
|
+
|
29
|
+
/* class Digest::MurmurHash2A < Digest::StringBuffer */
|
30
|
+
cDigest_MurmurHash2A = rb_define_class_under(mDigest, "MurmurHash2A", cDigest_StringBuffer);
|
31
|
+
rb_define_private_method(cDigest_MurmurHash2A, "finish", murmur2a_finish, 0);
|
32
|
+
rb_define_method(cDigest_MurmurHash2A, "to_i", murmur2a_to_i, 0);
|
33
|
+
rb_define_singleton_method(cDigest_MurmurHash2A, "rawdigest", murmur2a_s_rawdigest, -1);
|
34
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#ifndef MURMURHASH_INCLUDED
|
2
|
+
# define MURMURHASH_INCLUDED
|
3
|
+
|
4
|
+
#include "ruby.h"
|
5
|
+
|
6
|
+
#define MURMURHASH_MAGIC 0x5bd1e995
|
7
|
+
|
8
|
+
/* should be same type structure to digest/stringbuffer */
|
9
|
+
typedef struct {
|
10
|
+
char* buffer;
|
11
|
+
char* p;
|
12
|
+
size_t memsize;
|
13
|
+
} buffer_t;
|
14
|
+
|
15
|
+
#define MURMURHASH(self, name) \
|
16
|
+
buffer_t* name; \
|
17
|
+
Data_Get_Struct(self, buffer_t, name); \
|
18
|
+
if (name == NULL) { \
|
19
|
+
rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
|
20
|
+
}
|
21
|
+
|
22
|
+
#endif /* ifndef MURMURHASH_INCLUDED */
|
23
|
+
|
@@ -0,0 +1,86 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHash1 (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash1.h"
|
6
|
+
|
7
|
+
static inline size_t
|
8
|
+
murmur1(uint32_t h, const uint8_t r)
|
9
|
+
{
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
h *= m;
|
12
|
+
h ^= h >> r;
|
13
|
+
return h;
|
14
|
+
}
|
15
|
+
|
16
|
+
static uint32_t
|
17
|
+
murmur_hash_process1(const char *data, uint32_t length)
|
18
|
+
{
|
19
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
20
|
+
const uint8_t r = 16;
|
21
|
+
uint32_t h;
|
22
|
+
|
23
|
+
h = length * m;
|
24
|
+
|
25
|
+
while (4 <= length) {
|
26
|
+
h += *(uint32_t*)data;
|
27
|
+
h = murmur1(h, r);
|
28
|
+
data += 4;
|
29
|
+
length -= 4;
|
30
|
+
}
|
31
|
+
|
32
|
+
switch (length) {
|
33
|
+
case 3:
|
34
|
+
h += data[2] << 16;
|
35
|
+
case 2:
|
36
|
+
h += data[1] << 8;
|
37
|
+
case 1:
|
38
|
+
h += data[0];
|
39
|
+
h = murmur1(h, r);
|
40
|
+
}
|
41
|
+
|
42
|
+
h = murmur1(h, 10);
|
43
|
+
h = murmur1(h, 17);
|
44
|
+
|
45
|
+
return h;
|
46
|
+
}
|
47
|
+
|
48
|
+
VALUE
|
49
|
+
murmur1_finish(VALUE self)
|
50
|
+
{
|
51
|
+
uint32_t h;
|
52
|
+
uint8_t digest[4];
|
53
|
+
MURMURHASH(self, ptr);
|
54
|
+
|
55
|
+
h = murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer);
|
56
|
+
|
57
|
+
digest[0] = h >> 24;
|
58
|
+
digest[1] = h >> 16;
|
59
|
+
digest[2] = h >> 8;
|
60
|
+
digest[3] = h;
|
61
|
+
|
62
|
+
return rb_str_new((const char*) digest, 4);
|
63
|
+
}
|
64
|
+
|
65
|
+
VALUE
|
66
|
+
murmur1_to_i(VALUE self)
|
67
|
+
{
|
68
|
+
MURMURHASH(self, ptr);
|
69
|
+
return UINT2NUM(murmur_hash_process1(ptr->buffer, ptr->p - ptr->buffer));
|
70
|
+
}
|
71
|
+
|
72
|
+
VALUE
|
73
|
+
murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
74
|
+
{
|
75
|
+
VALUE str;
|
76
|
+
|
77
|
+
if (argc < 1)
|
78
|
+
rb_raise(rb_eArgError, "no data given");
|
79
|
+
|
80
|
+
str = *argv++;
|
81
|
+
argc--;
|
82
|
+
|
83
|
+
StringValue(str);
|
84
|
+
|
85
|
+
return UINT2NUM(murmur_hash_process1(RSTRING_PTR(str), RSTRING_LEN(str)));
|
86
|
+
}
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef MURMURHASH1_INCLUDED
|
2
|
+
# define MURMURHASH1_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur1_finish(VALUE self);
|
7
|
+
VALUE murmur1_to_i(VALUE self);
|
8
|
+
VALUE murmur1_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
|
10
|
+
#endif /* ifndef MURMURHASH1_INCLUDED */
|
11
|
+
|
@@ -0,0 +1,90 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHash2 (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash2.h"
|
6
|
+
|
7
|
+
static inline size_t
|
8
|
+
murmur2(uint32_t h, uint32_t k, const uint8_t r)
|
9
|
+
{
|
10
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
11
|
+
k *= m;
|
12
|
+
k ^= k >> r;
|
13
|
+
k *= m;
|
14
|
+
|
15
|
+
h *= m;
|
16
|
+
h ^= k;
|
17
|
+
return h;
|
18
|
+
}
|
19
|
+
|
20
|
+
static uint32_t
|
21
|
+
murmur_hash_process2(const char *data, uint32_t length)
|
22
|
+
{
|
23
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
24
|
+
const uint8_t r = 24;
|
25
|
+
uint32_t h, k;
|
26
|
+
|
27
|
+
h = length * m;
|
28
|
+
|
29
|
+
while (4 <= length) {
|
30
|
+
k = *(uint32_t*)data;
|
31
|
+
h = murmur2(h, k, r);
|
32
|
+
data += 4;
|
33
|
+
length -= 4;
|
34
|
+
}
|
35
|
+
|
36
|
+
switch (length) {
|
37
|
+
case 3: h ^= data[2] << 16;
|
38
|
+
case 2: h ^= data[1] << 8;
|
39
|
+
case 1: h ^= data[0];
|
40
|
+
h *= m;
|
41
|
+
}
|
42
|
+
|
43
|
+
h ^= h >> 13;
|
44
|
+
h *= m;
|
45
|
+
h ^= h >> 15;
|
46
|
+
|
47
|
+
return h;
|
48
|
+
}
|
49
|
+
|
50
|
+
VALUE
|
51
|
+
murmur2_finish(VALUE self)
|
52
|
+
{
|
53
|
+
uint32_t h;
|
54
|
+
uint8_t digest[4];
|
55
|
+
MURMURHASH(self, ptr);
|
56
|
+
|
57
|
+
h = murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer);
|
58
|
+
|
59
|
+
digest[0] = h >> 24;
|
60
|
+
digest[1] = h >> 16;
|
61
|
+
digest[2] = h >> 8;
|
62
|
+
digest[3] = h;
|
63
|
+
|
64
|
+
return rb_str_new((const char*) digest, 4);
|
65
|
+
}
|
66
|
+
|
67
|
+
VALUE
|
68
|
+
murmur2_to_i(VALUE self)
|
69
|
+
{
|
70
|
+
MURMURHASH(self, ptr);
|
71
|
+
return UINT2NUM(murmur_hash_process2(ptr->buffer, ptr->p - ptr->buffer));
|
72
|
+
}
|
73
|
+
|
74
|
+
VALUE
|
75
|
+
murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
76
|
+
{
|
77
|
+
VALUE str;
|
78
|
+
volatile VALUE obj;
|
79
|
+
|
80
|
+
if (argc < 1)
|
81
|
+
rb_raise(rb_eArgError, "no data given");
|
82
|
+
|
83
|
+
str = *argv++;
|
84
|
+
argc--;
|
85
|
+
|
86
|
+
StringValue(str);
|
87
|
+
|
88
|
+
return UINT2NUM(murmur_hash_process2(RSTRING_PTR(str), RSTRING_LEN(str)));
|
89
|
+
}
|
90
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef MURMURHASH2_INCLUDED
|
2
|
+
# define MURMURHASH2_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur2_finish(VALUE self);
|
7
|
+
VALUE murmur2_to_i(VALUE self);
|
8
|
+
VALUE murmur2_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
|
10
|
+
#endif /* ifndef MURMURHASH2_INCLUDED */
|
11
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
/*
|
2
|
+
* MurmurHash2A (C) Austin Appleby
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "murmurhash2a.h"
|
6
|
+
|
7
|
+
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
|
8
|
+
|
9
|
+
static uint32_t
|
10
|
+
murmur_hash_process2a(const void *key, uint32_t length)
|
11
|
+
{
|
12
|
+
const uint32_t m = MURMURHASH_MAGIC;
|
13
|
+
const uint8_t r = 24;
|
14
|
+
uint32_t h, k, t, l;
|
15
|
+
const unsigned char *data = (const unsigned char *) key;
|
16
|
+
|
17
|
+
l = length;
|
18
|
+
h = 0 ^ length;
|
19
|
+
|
20
|
+
while (4 <= length) {
|
21
|
+
k = *(uint32_t*)data;
|
22
|
+
mmix(h,k);
|
23
|
+
data += 4;
|
24
|
+
length -= 4;
|
25
|
+
}
|
26
|
+
|
27
|
+
t = 0;
|
28
|
+
switch (length) {
|
29
|
+
case 3: t ^= data[2] << 16;
|
30
|
+
case 2: t ^= data[1] << 8;
|
31
|
+
case 1: t ^= data[0];
|
32
|
+
}
|
33
|
+
|
34
|
+
mmix(h,t);
|
35
|
+
mmix(h,l);
|
36
|
+
|
37
|
+
h ^= h >> 13;
|
38
|
+
h *= m;
|
39
|
+
h ^= h >> 15;
|
40
|
+
|
41
|
+
return h;
|
42
|
+
}
|
43
|
+
|
44
|
+
VALUE
|
45
|
+
murmur2a_finish(VALUE self)
|
46
|
+
{
|
47
|
+
uint32_t h;
|
48
|
+
uint8_t digest[4];
|
49
|
+
MURMURHASH(self, ptr);
|
50
|
+
|
51
|
+
h = murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer);
|
52
|
+
|
53
|
+
digest[0] = h >> 24;
|
54
|
+
digest[1] = h >> 16;
|
55
|
+
digest[2] = h >> 8;
|
56
|
+
digest[3] = h;
|
57
|
+
|
58
|
+
return rb_str_new((const char*) digest, 4);
|
59
|
+
}
|
60
|
+
|
61
|
+
VALUE
|
62
|
+
murmur2a_to_i(VALUE self)
|
63
|
+
{
|
64
|
+
MURMURHASH(self, ptr);
|
65
|
+
return UINT2NUM(murmur_hash_process2a(ptr->buffer, ptr->p - ptr->buffer));
|
66
|
+
}
|
67
|
+
|
68
|
+
VALUE
|
69
|
+
murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass)
|
70
|
+
{
|
71
|
+
VALUE str;
|
72
|
+
volatile VALUE obj;
|
73
|
+
|
74
|
+
if (argc < 1)
|
75
|
+
rb_raise(rb_eArgError, "no data given");
|
76
|
+
|
77
|
+
str = *argv++;
|
78
|
+
argc--;
|
79
|
+
|
80
|
+
StringValue(str);
|
81
|
+
|
82
|
+
return UINT2NUM(murmur_hash_process2a(RSTRING_PTR(str), RSTRING_LEN(str)));
|
83
|
+
}
|
84
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
#ifndef MURMURHASH2A_INCLUDED
|
2
|
+
# define MURMURHASH2A_INCLUDED
|
3
|
+
|
4
|
+
#include "murmurhash.h"
|
5
|
+
|
6
|
+
VALUE murmur2a_finish(VALUE self);
|
7
|
+
VALUE murmur2a_to_i(VALUE self);
|
8
|
+
VALUE murmur2a_s_rawdigest(int argc, VALUE *argv, VALUE klass);
|
9
|
+
|
10
|
+
#endif /* ifndef MURMURHASH2A_INCLUDED */
|
11
|
+
|
data/spec/bench.rb
CHANGED
@@ -3,9 +3,24 @@
|
|
3
3
|
lib = File.expand_path('../../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
+
require 'digest/stringbuffer'
|
6
7
|
require 'digest/murmurhash'
|
7
8
|
require 'benchmark'
|
8
9
|
|
10
|
+
class Prime37 < Digest::StringBuffer
|
11
|
+
def initialize
|
12
|
+
@prime = 37
|
13
|
+
end
|
14
|
+
|
15
|
+
def finish
|
16
|
+
result = 0
|
17
|
+
buffer.unpack("C*").each do |c|
|
18
|
+
result += (c * @prime)
|
19
|
+
end
|
20
|
+
[result & 0xffffffff].pack("N")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
9
24
|
class Integer
|
10
25
|
def to_32
|
11
26
|
self & 0xffffffff
|
@@ -44,30 +59,82 @@ def murmur_hash str
|
|
44
59
|
h = (h * m).to_32
|
45
60
|
h ^= h >> 17
|
46
61
|
|
47
|
-
h
|
62
|
+
h
|
48
63
|
end
|
49
64
|
|
65
|
+
@rands = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789".split(//)
|
50
66
|
def rand_str
|
51
67
|
rand = "";
|
52
|
-
|
68
|
+
20.times {
|
69
|
+
rand << @rands[rand(62)]
|
70
|
+
}
|
53
71
|
rand
|
54
72
|
end
|
55
73
|
|
56
|
-
|
57
|
-
|
58
|
-
|
74
|
+
n = 100000
|
75
|
+
times_enum = n.times
|
76
|
+
|
77
|
+
a = Array.new(n, 0)
|
78
|
+
n.times do |i|
|
79
|
+
a[i] = rand_str
|
80
|
+
end
|
81
|
+
|
82
|
+
c = Struct.new "Cases",
|
83
|
+
:name,
|
84
|
+
:func
|
85
|
+
cases = [
|
86
|
+
c.new("pureRuby", proc{|x| murmur_hash x }),
|
87
|
+
c.new("Prime37", proc{|x| Prime37.digest x }),
|
88
|
+
c.new("MurmurHash1", proc{|x| Digest::MurmurHash1.rawdigest x }),
|
89
|
+
c.new("MurmurHash2", proc{|x| Digest::MurmurHash2.rawdigest x }),
|
90
|
+
c.new("MurmurHash2A", proc{|x| Digest::MurmurHash2A.rawdigest x }),
|
91
|
+
]
|
92
|
+
|
93
|
+
reals = {}
|
94
|
+
confrict = {}
|
95
|
+
confricts = {}
|
59
96
|
|
97
|
+
puts "### condition"
|
98
|
+
puts
|
99
|
+
puts " RUBY_VERSION = #{RUBY_VERSION}"
|
100
|
+
puts " count = #{n}"
|
101
|
+
puts
|
102
|
+
puts "### benchmark"
|
103
|
+
puts
|
104
|
+
puts "```"
|
60
105
|
Benchmark.bm do |x|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
106
|
+
cases.each do |c|
|
107
|
+
z = x.report c.name do
|
108
|
+
times_enum.each do |i|
|
109
|
+
c.func.call(a[i])
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
confrict.clear
|
114
|
+
times_enum.each do |i|
|
115
|
+
rethash = c.func.call(a[i])
|
116
|
+
if confrict[rethash].nil?
|
117
|
+
confrict[rethash] = 0
|
118
|
+
else
|
119
|
+
confrict[rethash] += 1
|
120
|
+
end
|
121
|
+
end
|
122
|
+
reals[c.name] = z.real
|
123
|
+
confricts[c.name] = confrict.count{|hash, count| 0 < count}
|
124
|
+
end
|
125
|
+
end
|
126
|
+
puts "```"
|
127
|
+
|
128
|
+
puts
|
129
|
+
puts "### real second rate (pureRuby/)"
|
130
|
+
puts
|
131
|
+
reals.each do |name, real|
|
132
|
+
puts " " + (reals["pureRuby"] / real).to_s + "/" + name
|
133
|
+
end
|
66
134
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
}}
|
135
|
+
puts
|
136
|
+
puts "### confrict count (/#{n})"
|
137
|
+
puts
|
138
|
+
confricts.each do |name, count|
|
139
|
+
puts " #{name}: #{count}"
|
73
140
|
end
|
data/spec/digest_spec.rb
CHANGED
@@ -1,60 +1,82 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe Digest::
|
4
|
-
let :
|
5
|
-
|
3
|
+
describe "Digest::MurmurHash1 and 2" do
|
4
|
+
let :all do
|
5
|
+
[MurmurHash1, MurmurHash2, MurmurHash2A]
|
6
6
|
end
|
7
7
|
|
8
8
|
it "initialize" do
|
9
|
-
expect(
|
9
|
+
expect(MurmurHash1.new).to be_a_kind_of(Digest::StringBuffer)
|
10
|
+
expect(MurmurHash2.new).to be_a_kind_of(Digest::StringBuffer)
|
11
|
+
expect(MurmurHash2A.new).to be_a_kind_of(Digest::StringBuffer)
|
12
|
+
end
|
13
|
+
|
14
|
+
it "digest" do
|
15
|
+
expect(MurmurHash1.digest("a" * 1024)).to eq("\xa1\x52\x2e\x5f".force_encoding("ASCII-8BIT"))
|
16
|
+
expect(MurmurHash2.digest("a" * 1024)).to eq("\xd0\x0c\x31\x2f".force_encoding("ASCII-8BIT"))
|
17
|
+
expect(MurmurHash2A.digest("a" * 1024)).to eq("\xd5\x2d\xb1\x67".force_encoding("ASCII-8BIT"))
|
10
18
|
end
|
11
19
|
|
12
20
|
it "hexdigest" do
|
13
|
-
expect(
|
21
|
+
expect(MurmurHash1.hexdigest("a" * 1024)).to eq("a1522e5f")
|
22
|
+
expect(MurmurHash2.hexdigest("a" * 1024)).to eq("d00c312f")
|
23
|
+
expect(MurmurHash2A.hexdigest("a" * 1024)).to eq("d52db167")
|
14
24
|
end
|
15
25
|
|
16
|
-
it "
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
murmur.update("m")
|
21
|
-
murmur.update("u")
|
22
|
-
murmur.update("r")
|
23
|
-
murmur.update("hash")
|
24
|
-
expect(murmur.hexdigest).to eq("c709abd5");
|
25
|
-
expect(murmur.hexdigest).to eq("c709abd5");
|
26
|
-
expect(murmur.hexdigest!).to eq("c709abd5");
|
27
|
-
expect(murmur.hexdigest).to eq("00000000");
|
26
|
+
it "rawdigest" do
|
27
|
+
expect(MurmurHash1.rawdigest("a" * 1024)).to eq(0xa1522e5f)
|
28
|
+
expect(MurmurHash2.rawdigest("a" * 1024)).to eq(0xd00c312f)
|
29
|
+
expect(MurmurHash2A.rawdigest("a" * 1024)).to eq(0xd52db167)
|
28
30
|
end
|
29
31
|
|
30
|
-
it "
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
it "update and reset and hexdigest" do
|
33
|
+
{
|
34
|
+
MurmurHash1 => "c709abd5",
|
35
|
+
MurmurHash2 => "33f67c7e",
|
36
|
+
MurmurHash2A => "df25554b",
|
37
|
+
}.each do |c, should|
|
38
|
+
murmur = c.new
|
39
|
+
murmur.update("m").update("u").update("r")
|
40
|
+
murmur << "m" << "u" << "r"
|
41
|
+
murmur << "hash"
|
42
|
+
expect(murmur.hexdigest).to eq(should);
|
43
|
+
expect(murmur.hexdigest).to eq(should);
|
44
|
+
expect(murmur.hexdigest!).to eq(should);
|
45
|
+
expect(murmur.hexdigest).to eq("00000000");
|
46
|
+
end
|
37
47
|
end
|
38
48
|
|
39
49
|
it "==" do
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
50
|
+
all.each do |c|
|
51
|
+
["", "murmur", "murmurhash" * 1024].each do |str|
|
52
|
+
murmur1 = c.new
|
53
|
+
murmur2 = c.new
|
54
|
+
expect(murmur1.update(str) == murmur2.update(str)).to be_true
|
55
|
+
end
|
44
56
|
end
|
45
57
|
end
|
46
58
|
|
47
|
-
it "
|
48
|
-
|
59
|
+
it "dup" do
|
60
|
+
all.each do |c|
|
61
|
+
murmur1 = c.new
|
62
|
+
murmur2 = c.new
|
63
|
+
10.times {
|
64
|
+
murmur1 = murmur1.update("murmurhash" * 100).dup
|
65
|
+
}
|
66
|
+
murmur2.update(("murmurhash" * 100) * 10)
|
67
|
+
expect(murmur1 == murmur2).to be_true
|
68
|
+
end
|
49
69
|
end
|
50
70
|
|
51
|
-
it "
|
52
|
-
|
53
|
-
|
54
|
-
expect(
|
71
|
+
it "length" do
|
72
|
+
expect(MurmurHash1.new.length).to eq(4);
|
73
|
+
expect(MurmurHash2.new.length).to eq(4);
|
74
|
+
expect(MurmurHash2A.new.length).to eq(4);
|
55
75
|
end
|
56
76
|
|
57
77
|
it "to_i" do
|
58
|
-
expect(
|
78
|
+
expect(MurmurHash1.new.update("murmurhash").to_i).to eq(0xc709abd5);
|
79
|
+
expect(MurmurHash2.new.update("murmurhash").to_i).to eq(0x33f67c7e);
|
80
|
+
expect(MurmurHash2A.new.update("murmurhash").to_i).to eq(0xdf25554b);
|
59
81
|
end
|
60
82
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe MurmurHash do
|
4
|
+
let :all do
|
5
|
+
[MurmurHash1, MurmurHash2, MurmurHash2A]
|
6
|
+
end
|
7
|
+
|
8
|
+
it "update nil" do
|
9
|
+
all.each do |c|
|
10
|
+
murmur = c.new
|
11
|
+
expect{ murmur.update }.to raise_error(ArgumentError)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
it "rawdigest no arguments" do
|
16
|
+
all.each do |c|
|
17
|
+
expect{ c.rawdigest }.to raise_error(ArgumentError)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/spec/mem_spec.rb
CHANGED
@@ -1,12 +1,18 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe MurmurHash do
|
4
4
|
it "gc safe" do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
5
|
+
{
|
6
|
+
MurmurHash1 => "c709abd5",
|
7
|
+
MurmurHash2 => "33f67c7e",
|
8
|
+
MurmurHash2A => "df25554b",
|
9
|
+
}.each do |c, should|
|
10
|
+
murmur = c.new
|
11
|
+
GC.start
|
12
|
+
murmur.update("murmur")
|
13
|
+
GC.start
|
14
|
+
expect(murmur.update("hash").to_s).to eq(should);
|
15
|
+
end
|
10
16
|
end
|
11
17
|
end
|
12
18
|
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digest-murmurhash
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ksss
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -58,16 +58,30 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
61
|
+
version: 0.9.2
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ~>
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
69
|
-
|
70
|
-
|
68
|
+
version: 0.9.2
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: digest-stringbuffer
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ~>
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.0.2
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ~>
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.0.2
|
83
|
+
description: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
|
84
|
+
by Austin Appleby.
|
71
85
|
email: co000ri@gmail.com
|
72
86
|
executables: []
|
73
87
|
extensions:
|
@@ -82,10 +96,19 @@ files:
|
|
82
96
|
- Rakefile
|
83
97
|
- digest-murmurhash.gemspec
|
84
98
|
- ext/digest/murmurhash/extconf.rb
|
85
|
-
- ext/digest/murmurhash/
|
99
|
+
- ext/digest/murmurhash/init.c
|
100
|
+
- ext/digest/murmurhash/murmurhash.h
|
101
|
+
- ext/digest/murmurhash/murmurhash1.c
|
102
|
+
- ext/digest/murmurhash/murmurhash1.h
|
103
|
+
- ext/digest/murmurhash/murmurhash2.c
|
104
|
+
- ext/digest/murmurhash/murmurhash2.h
|
105
|
+
- ext/digest/murmurhash/murmurhash2a.c
|
106
|
+
- ext/digest/murmurhash/murmurhash2a.h
|
107
|
+
- lib/digest/murmurhash.rb
|
86
108
|
- lib/digest/murmurhash/version.rb
|
87
109
|
- spec/bench.rb
|
88
110
|
- spec/digest_spec.rb
|
111
|
+
- spec/exception_spec.rb
|
89
112
|
- spec/mem_spec.rb
|
90
113
|
- spec/spec_helper.rb
|
91
114
|
homepage: ''
|
@@ -108,13 +131,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
131
|
version: '0'
|
109
132
|
requirements: []
|
110
133
|
rubyforge_project:
|
111
|
-
rubygems_version: 2.1.
|
134
|
+
rubygems_version: 2.1.11
|
112
135
|
signing_key:
|
113
136
|
specification_version: 4
|
114
|
-
summary: Digest::
|
115
|
-
|
137
|
+
summary: Digest::MurmurHash1 is a class of use algorithm MurmurHash1(32-bit) desiged
|
138
|
+
by Austin Appleby.
|
116
139
|
test_files:
|
117
140
|
- spec/bench.rb
|
118
141
|
- spec/digest_spec.rb
|
142
|
+
- spec/exception_spec.rb
|
119
143
|
- spec/mem_spec.rb
|
120
144
|
- spec/spec_helper.rb
|
@@ -1,209 +0,0 @@
|
|
1
|
-
#include "ruby.h"
|
2
|
-
#ifdef HAVE_RUBY_DIGEST_H
|
3
|
-
#include "ruby/digest.h"
|
4
|
-
#else
|
5
|
-
#include "digest.h"
|
6
|
-
#endif
|
7
|
-
|
8
|
-
#define MURMURHASH_DIGEST_LENGTH 4
|
9
|
-
#define MURMURHASH_BLOCK_LENGTH 4
|
10
|
-
|
11
|
-
|
12
|
-
ID id_concat;
|
13
|
-
|
14
|
-
typedef struct {
|
15
|
-
char* data;
|
16
|
-
char* p;
|
17
|
-
size_t memsize;
|
18
|
-
} murmur_t;
|
19
|
-
|
20
|
-
#define MURMURHASH(self, name) \
|
21
|
-
murmur_t* name; \
|
22
|
-
Data_Get_Struct(self, murmur_t, name); \
|
23
|
-
if (name == NULL) { \
|
24
|
-
rb_raise(rb_eArgError, "NULL found for " # name " when shouldn't be.'"); \
|
25
|
-
}
|
26
|
-
|
27
|
-
static void
|
28
|
-
murmur_init(murmur_t* ptr)
|
29
|
-
{
|
30
|
-
ptr->data = (char*) malloc(sizeof(char) * 64);
|
31
|
-
ptr->p = ptr->data;
|
32
|
-
ptr->memsize = 64;
|
33
|
-
}
|
34
|
-
|
35
|
-
static void
|
36
|
-
murmur_mark(murmur_t* ptr)
|
37
|
-
{
|
38
|
-
}
|
39
|
-
|
40
|
-
static void
|
41
|
-
murmur_free(murmur_t* ptr)
|
42
|
-
{
|
43
|
-
free(ptr->data);
|
44
|
-
}
|
45
|
-
|
46
|
-
static VALUE
|
47
|
-
murmur_alloc(VALUE self)
|
48
|
-
{
|
49
|
-
murmur_t* ptr = ALLOC(murmur_t);
|
50
|
-
murmur_init(ptr);
|
51
|
-
return Data_Wrap_Struct(self, murmur_mark, murmur_free, ptr);
|
52
|
-
}
|
53
|
-
|
54
|
-
static VALUE
|
55
|
-
murmur_initialize_copy(VALUE copy, VALUE origin)
|
56
|
-
{
|
57
|
-
murmur_t *ptr_copy, *ptr_origin;
|
58
|
-
size_t data_len;
|
59
|
-
|
60
|
-
if (copy == origin) return copy;
|
61
|
-
|
62
|
-
rb_check_frozen(copy);
|
63
|
-
|
64
|
-
Data_Get_Struct(copy, murmur_t, ptr_copy);
|
65
|
-
Data_Get_Struct(origin, murmur_t, ptr_origin);
|
66
|
-
|
67
|
-
data_len = ptr_origin->p - ptr_origin->data;
|
68
|
-
ptr_copy->data = (char*) malloc(sizeof(char) * ptr_origin->memsize);
|
69
|
-
memcpy(ptr_copy->data, ptr_origin->data, data_len);
|
70
|
-
ptr_copy->p = ptr_copy->data + data_len;
|
71
|
-
ptr_copy->memsize = ptr_origin->memsize;
|
72
|
-
|
73
|
-
return copy;
|
74
|
-
}
|
75
|
-
|
76
|
-
static VALUE
|
77
|
-
murmur_reset(VALUE self)
|
78
|
-
{
|
79
|
-
MURMURHASH(self, ptr);
|
80
|
-
ptr->p = ptr->data;
|
81
|
-
return self;
|
82
|
-
}
|
83
|
-
|
84
|
-
static VALUE
|
85
|
-
murmur_update(VALUE self, VALUE str)
|
86
|
-
{
|
87
|
-
size_t data_len, str_len, require, newsize;
|
88
|
-
const char* str_p;
|
89
|
-
MURMURHASH(self, ptr);
|
90
|
-
|
91
|
-
StringValue(str);
|
92
|
-
str_p = RSTRING_PTR(str);
|
93
|
-
str_len = RSTRING_LEN(str);
|
94
|
-
data_len = (ptr->p - ptr->data);
|
95
|
-
require = data_len + str_len;
|
96
|
-
if (ptr->memsize < require) {
|
97
|
-
newsize = ptr->memsize;
|
98
|
-
while (newsize < require) {
|
99
|
-
newsize *= 2;
|
100
|
-
}
|
101
|
-
ptr->data = realloc(ptr->data, sizeof(char) * newsize);
|
102
|
-
ptr->p = ptr->data + data_len;
|
103
|
-
ptr->memsize = newsize;
|
104
|
-
}
|
105
|
-
memcpy(ptr->p, str_p, str_len);
|
106
|
-
ptr->p += str_len;
|
107
|
-
|
108
|
-
return self;
|
109
|
-
}
|
110
|
-
|
111
|
-
static uint32_t
|
112
|
-
murmur_hash_process(murmur_t* ptr)
|
113
|
-
{
|
114
|
-
const uint32_t m = 0x5bd1e995;
|
115
|
-
const uint8_t r = 16;
|
116
|
-
uint32_t length, h;
|
117
|
-
const char* p;
|
118
|
-
|
119
|
-
p = ptr->data;
|
120
|
-
length = ptr->p - ptr->data;
|
121
|
-
h = length * m;
|
122
|
-
|
123
|
-
while (4 <= length) {
|
124
|
-
h += *(uint32_t*)p;
|
125
|
-
h *= m;
|
126
|
-
h ^= h >> r;
|
127
|
-
p += 4;
|
128
|
-
length -= 4;
|
129
|
-
}
|
130
|
-
|
131
|
-
switch (length) {
|
132
|
-
case 3:
|
133
|
-
h += p[2] << 16;
|
134
|
-
case 2:
|
135
|
-
h += p[1] << 8;
|
136
|
-
case 1:
|
137
|
-
h += p[0];
|
138
|
-
h *= m;
|
139
|
-
h ^= h >> r;
|
140
|
-
}
|
141
|
-
|
142
|
-
h *= m;
|
143
|
-
h ^= h >> 10;
|
144
|
-
h *= m;
|
145
|
-
h ^= h >> 17;
|
146
|
-
|
147
|
-
return h;
|
148
|
-
}
|
149
|
-
|
150
|
-
static VALUE
|
151
|
-
murmur_finish(VALUE self)
|
152
|
-
{
|
153
|
-
uint32_t h;
|
154
|
-
uint8_t digest[MURMURHASH_DIGEST_LENGTH];
|
155
|
-
MURMURHASH(self, ptr);
|
156
|
-
|
157
|
-
h = murmur_hash_process(ptr);
|
158
|
-
|
159
|
-
digest[0] = (h >> 24);
|
160
|
-
digest[1] = (h >> 16);
|
161
|
-
digest[2] = (h >> 8);
|
162
|
-
digest[3] = (h);
|
163
|
-
|
164
|
-
return rb_str_new((const char*) digest, 4);
|
165
|
-
}
|
166
|
-
|
167
|
-
static VALUE
|
168
|
-
murmur_digest_length(VALUE self)
|
169
|
-
{
|
170
|
-
return INT2NUM(MURMURHASH_DIGEST_LENGTH);
|
171
|
-
}
|
172
|
-
|
173
|
-
static VALUE
|
174
|
-
murmur_block_length(VALUE self)
|
175
|
-
{
|
176
|
-
return INT2NUM(MURMURHASH_BLOCK_LENGTH);
|
177
|
-
}
|
178
|
-
|
179
|
-
static VALUE
|
180
|
-
murmur_to_i(VALUE self)
|
181
|
-
{
|
182
|
-
MURMURHASH(self, ptr);
|
183
|
-
return UINT2NUM(murmur_hash_process(ptr));
|
184
|
-
}
|
185
|
-
|
186
|
-
void
|
187
|
-
Init_murmurhash()
|
188
|
-
{
|
189
|
-
VALUE mDigest, cDigest_Base, cDigest_MurmurHash;
|
190
|
-
|
191
|
-
id_concat = rb_intern("concat");
|
192
|
-
|
193
|
-
rb_require("digest");
|
194
|
-
|
195
|
-
mDigest = rb_path2class("Digest");
|
196
|
-
cDigest_Base = rb_path2class("Digest::Base");
|
197
|
-
|
198
|
-
cDigest_MurmurHash = rb_define_class_under(mDigest, "MurmurHash", cDigest_Base);
|
199
|
-
|
200
|
-
rb_define_alloc_func(cDigest_MurmurHash, murmur_alloc);
|
201
|
-
rb_define_method(cDigest_MurmurHash, "initialize_copy", murmur_initialize_copy, 1);
|
202
|
-
rb_define_method(cDigest_MurmurHash, "reset", murmur_reset, 0);
|
203
|
-
rb_define_method(cDigest_MurmurHash, "update", murmur_update, 1);
|
204
|
-
rb_define_private_method(cDigest_MurmurHash, "finish", murmur_finish, 0);
|
205
|
-
rb_define_method(cDigest_MurmurHash, "digest_length", murmur_digest_length, 0);
|
206
|
-
rb_define_method(cDigest_MurmurHash, "block_length", murmur_block_length, 0);
|
207
|
-
|
208
|
-
rb_define_method(cDigest_MurmurHash, "to_i", murmur_to_i, 0);
|
209
|
-
}
|