pearson-hashing 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/benchmark/collisions.rb +6 -2
- data/benchmark/speed.rb +11 -3
- data/lib/pearson-hashing.rb +45 -5
- data/lib/pearson-hashing/version.rb +1 -1
- data/spec/lib/pearson_hashing_spec.rb +25 -3
- metadata +2 -2
data/benchmark/collisions.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require File.expand_path('../../lib/pearson-hashing', __FILE__)
|
4
4
|
|
5
|
-
SIZE =
|
5
|
+
SIZE = 512**2
|
6
6
|
|
7
7
|
def benchmark(name, &block)
|
8
8
|
puts "#{name}.."
|
@@ -21,11 +21,15 @@ end
|
|
21
21
|
benchmark 'PearsonHashing#digest8' do |str|
|
22
22
|
PearsonHashing.digest8 str
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
benchmark 'PearsonHashing#digest16' do |str|
|
26
26
|
PearsonHashing.digest16 str
|
27
27
|
end
|
28
28
|
|
29
|
+
benchmark 'PearsonHashing#digest32' do |str|
|
30
|
+
PearsonHashing.digest32 str
|
31
|
+
end
|
32
|
+
|
29
33
|
benchmark 'String#hash' do |str|
|
30
34
|
str.hash
|
31
35
|
end
|
data/benchmark/speed.rb
CHANGED
@@ -3,21 +3,29 @@
|
|
3
3
|
require 'benchmark'
|
4
4
|
require File.expand_path('../../lib/pearson-hashing', __FILE__)
|
5
5
|
|
6
|
+
SIZE = 512**2
|
7
|
+
|
6
8
|
Benchmark.bm do |b|
|
7
9
|
b.report 'PearsonHashing#digest8' do
|
8
|
-
|
10
|
+
SIZE.times do |i|
|
9
11
|
PearsonHashing.digest8 i.to_s
|
10
12
|
end
|
11
13
|
end
|
12
14
|
|
13
15
|
b.report 'PearsonHashing#digest16' do
|
14
|
-
|
16
|
+
SIZE.times do |i|
|
15
17
|
PearsonHashing.digest16 i.to_s
|
16
18
|
end
|
17
19
|
end
|
18
20
|
|
21
|
+
b.report 'PearsonHashing#digest32' do
|
22
|
+
SIZE.times do |i|
|
23
|
+
PearsonHashing.digest32 i.to_s
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
19
27
|
b.report 'String#hash' do
|
20
|
-
|
28
|
+
SIZE.times do |i|
|
21
29
|
i.to_s.hash
|
22
30
|
end
|
23
31
|
end
|
data/lib/pearson-hashing.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'pearson-hashing/version')
|
2
2
|
|
3
|
-
|
4
3
|
module PearsonHashing
|
5
4
|
TABLE = [49, 118, 63, 252, 13, 155, 114, 130, 137, 40, 210, 62, 219, 246, 136, 221,
|
6
5
|
174, 106, 37, 227, 166, 25, 139, 19, 204, 212, 64, 176, 70, 11, 170, 58,
|
@@ -36,6 +35,8 @@ module PearsonHashing
|
|
36
35
|
end
|
37
36
|
hash
|
38
37
|
end
|
38
|
+
|
39
|
+
# make #digest8 the default
|
39
40
|
class << self
|
40
41
|
alias :digest :digest8
|
41
42
|
end
|
@@ -47,9 +48,48 @@ module PearsonHashing
|
|
47
48
|
# @param [String] string
|
48
49
|
# @return [Fixnum] hash
|
49
50
|
def self.digest16(string)
|
50
|
-
h1 = PearsonHashing.
|
51
|
-
|
52
|
-
|
53
|
-
(
|
51
|
+
h1 = PearsonHashing.digest8 string
|
52
|
+
h2 = PearsonHashing.digest8 shift string
|
53
|
+
format = '%03d'
|
54
|
+
(format % h1 + format % h2).to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
# 32 bit hash
|
58
|
+
# @param [String] string
|
59
|
+
# @param [Fixnum] hashvalue
|
60
|
+
def self.digest32(string)
|
61
|
+
h1 = PearsonHashing.digest8 string
|
62
|
+
h2 = PearsonHashing.digest8 shift string
|
63
|
+
h3 = PearsonHashing.digest8 shift shift string
|
64
|
+
format = '%03d'
|
65
|
+
(format % h1 + format % h2 + format % h3).to_i
|
66
|
+
end
|
67
|
+
|
68
|
+
# this is a dynamic implementation just to enable longer hashes
|
69
|
+
# but known to be slower than the hardcoded methods above
|
70
|
+
#
|
71
|
+
# 1 byte = 8 bit
|
72
|
+
# 2 bytes = 16 bit
|
73
|
+
# ...
|
74
|
+
#
|
75
|
+
# @param [String] string
|
76
|
+
# @param [Fixnum] no of bytes
|
77
|
+
# @return [Fixnum] hashvalue
|
78
|
+
def self.digest_with_x_bytes(string, no_of_bytes)
|
79
|
+
tmp_str = string.dup
|
80
|
+
result = ''
|
81
|
+
no_of_bytes.times do
|
82
|
+
result = result + "%03d" % PearsonHashing.digest8(tmp_str)
|
83
|
+
tmp_str = shift(tmp_str)
|
84
|
+
end
|
85
|
+
result.to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# @param [String] str
|
91
|
+
# @return [String] str with first byte moved up
|
92
|
+
def self.shift(str)
|
93
|
+
[(str.bytes.first+1)%256].pack('U*') + str[1,str.size]
|
54
94
|
end
|
55
95
|
end
|
@@ -1,6 +1,12 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe PearsonHashing do
|
4
|
+
describe '#digest' do
|
5
|
+
it 'should return the same as #digest8' do
|
6
|
+
PearsonHashing.digest('foo').should == PearsonHashing.digest8('foo')
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
4
10
|
describe '#digest8' do
|
5
11
|
it 'should return 96 for "foo"' do
|
6
12
|
PearsonHashing.digest('foo').should == 96
|
@@ -13,9 +19,25 @@ describe PearsonHashing do
|
|
13
19
|
end
|
14
20
|
end
|
15
21
|
|
16
|
-
describe '#
|
17
|
-
it 'should return
|
18
|
-
PearsonHashing.
|
22
|
+
describe '#digest32' do
|
23
|
+
it 'should return 96 for "foo"' do
|
24
|
+
PearsonHashing.digest32('foo').should == 96099141
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe '#digest_with_x_bytes' do
|
29
|
+
it 'should equal #digest8 with 1 byte' do
|
30
|
+
PearsonHashing.digest8('foo').should == \
|
31
|
+
PearsonHashing.digest_with_x_bytes('foo', 1)
|
32
|
+
end
|
33
|
+
it 'should equal #digest16 with 2 byte' do
|
34
|
+
PearsonHashing.digest16('foo').should == \
|
35
|
+
PearsonHashing.digest_with_x_bytes('foo', 2)
|
36
|
+
end
|
37
|
+
it 'should equal #digest21 with 3 byte' do
|
38
|
+
PearsonHashing.digest32('foo').should == \
|
39
|
+
PearsonHashing.digest_with_x_bytes('foo', 3)
|
19
40
|
end
|
20
41
|
end
|
42
|
+
|
21
43
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pearson-hashing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-18 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: pearson hasing provides "fast hashing of variable-length text strings
|
15
15
|
email:
|