pearson-hashing 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/benchmark/collisions.rb +6 -2
- data/benchmark/speed.rb +11 -3
- data/lib/pearson-hashing.rb +45 -5
- data/lib/pearson-hashing/version.rb +1 -1
- data/spec/lib/pearson_hashing_spec.rb +25 -3
- metadata +2 -2
data/benchmark/collisions.rb
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
require File.expand_path('../../lib/pearson-hashing', __FILE__)
|
4
4
|
|
5
|
-
SIZE =
|
5
|
+
SIZE = 512**2
|
6
6
|
|
7
7
|
def benchmark(name, &block)
|
8
8
|
puts "#{name}.."
|
@@ -21,11 +21,15 @@ end
|
|
21
21
|
benchmark 'PearsonHashing#digest8' do |str|
|
22
22
|
PearsonHashing.digest8 str
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
benchmark 'PearsonHashing#digest16' do |str|
|
26
26
|
PearsonHashing.digest16 str
|
27
27
|
end
|
28
28
|
|
29
|
+
benchmark 'PearsonHashing#digest32' do |str|
|
30
|
+
PearsonHashing.digest32 str
|
31
|
+
end
|
32
|
+
|
29
33
|
benchmark 'String#hash' do |str|
|
30
34
|
str.hash
|
31
35
|
end
|
data/benchmark/speed.rb
CHANGED
@@ -3,21 +3,29 @@
|
|
3
3
|
require 'benchmark'
|
4
4
|
require File.expand_path('../../lib/pearson-hashing', __FILE__)
|
5
5
|
|
6
|
+
SIZE = 512**2
|
7
|
+
|
6
8
|
Benchmark.bm do |b|
|
7
9
|
b.report 'PearsonHashing#digest8' do
|
8
|
-
|
10
|
+
SIZE.times do |i|
|
9
11
|
PearsonHashing.digest8 i.to_s
|
10
12
|
end
|
11
13
|
end
|
12
14
|
|
13
15
|
b.report 'PearsonHashing#digest16' do
|
14
|
-
|
16
|
+
SIZE.times do |i|
|
15
17
|
PearsonHashing.digest16 i.to_s
|
16
18
|
end
|
17
19
|
end
|
18
20
|
|
21
|
+
b.report 'PearsonHashing#digest32' do
|
22
|
+
SIZE.times do |i|
|
23
|
+
PearsonHashing.digest32 i.to_s
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
19
27
|
b.report 'String#hash' do
|
20
|
-
|
28
|
+
SIZE.times do |i|
|
21
29
|
i.to_s.hash
|
22
30
|
end
|
23
31
|
end
|
data/lib/pearson-hashing.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'pearson-hashing/version')
|
2
2
|
|
3
|
-
|
4
3
|
module PearsonHashing
|
5
4
|
TABLE = [49, 118, 63, 252, 13, 155, 114, 130, 137, 40, 210, 62, 219, 246, 136, 221,
|
6
5
|
174, 106, 37, 227, 166, 25, 139, 19, 204, 212, 64, 176, 70, 11, 170, 58,
|
@@ -36,6 +35,8 @@ module PearsonHashing
|
|
36
35
|
end
|
37
36
|
hash
|
38
37
|
end
|
38
|
+
|
39
|
+
# make #digest8 the default
|
39
40
|
class << self
|
40
41
|
alias :digest :digest8
|
41
42
|
end
|
@@ -47,9 +48,48 @@ module PearsonHashing
|
|
47
48
|
# @param [String] string
|
48
49
|
# @return [Fixnum] hash
|
49
50
|
def self.digest16(string)
|
50
|
-
h1 = PearsonHashing.
|
51
|
-
|
52
|
-
|
53
|
-
(
|
51
|
+
h1 = PearsonHashing.digest8 string
|
52
|
+
h2 = PearsonHashing.digest8 shift string
|
53
|
+
format = '%03d'
|
54
|
+
(format % h1 + format % h2).to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
# 32 bit hash
|
58
|
+
# @param [String] string
|
59
|
+
# @param [Fixnum] hashvalue
|
60
|
+
def self.digest32(string)
|
61
|
+
h1 = PearsonHashing.digest8 string
|
62
|
+
h2 = PearsonHashing.digest8 shift string
|
63
|
+
h3 = PearsonHashing.digest8 shift shift string
|
64
|
+
format = '%03d'
|
65
|
+
(format % h1 + format % h2 + format % h3).to_i
|
66
|
+
end
|
67
|
+
|
68
|
+
# this is a dynamic implementation just to enable longer hashes
|
69
|
+
# but known to be slower than the hardcoded methods above
|
70
|
+
#
|
71
|
+
# 1 byte = 8 bit
|
72
|
+
# 2 bytes = 16 bit
|
73
|
+
# ...
|
74
|
+
#
|
75
|
+
# @param [String] string
|
76
|
+
# @param [Fixnum] no of bytes
|
77
|
+
# @return [Fixnum] hashvalue
|
78
|
+
def self.digest_with_x_bytes(string, no_of_bytes)
|
79
|
+
tmp_str = string.dup
|
80
|
+
result = ''
|
81
|
+
no_of_bytes.times do
|
82
|
+
result = result + "%03d" % PearsonHashing.digest8(tmp_str)
|
83
|
+
tmp_str = shift(tmp_str)
|
84
|
+
end
|
85
|
+
result.to_i
|
86
|
+
end
|
87
|
+
|
88
|
+
private
|
89
|
+
|
90
|
+
# @param [String] str
|
91
|
+
# @return [String] str with first byte moved up
|
92
|
+
def self.shift(str)
|
93
|
+
[(str.bytes.first+1)%256].pack('U*') + str[1,str.size]
|
54
94
|
end
|
55
95
|
end
|
@@ -1,6 +1,12 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe PearsonHashing do
|
4
|
+
describe '#digest' do
|
5
|
+
it 'should return the same as #digest8' do
|
6
|
+
PearsonHashing.digest('foo').should == PearsonHashing.digest8('foo')
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
4
10
|
describe '#digest8' do
|
5
11
|
it 'should return 96 for "foo"' do
|
6
12
|
PearsonHashing.digest('foo').should == 96
|
@@ -13,9 +19,25 @@ describe PearsonHashing do
|
|
13
19
|
end
|
14
20
|
end
|
15
21
|
|
16
|
-
describe '#
|
17
|
-
it 'should return
|
18
|
-
PearsonHashing.
|
22
|
+
describe '#digest32' do
|
23
|
+
it 'should return 96 for "foo"' do
|
24
|
+
PearsonHashing.digest32('foo').should == 96099141
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe '#digest_with_x_bytes' do
|
29
|
+
it 'should equal #digest8 with 1 byte' do
|
30
|
+
PearsonHashing.digest8('foo').should == \
|
31
|
+
PearsonHashing.digest_with_x_bytes('foo', 1)
|
32
|
+
end
|
33
|
+
it 'should equal #digest16 with 2 byte' do
|
34
|
+
PearsonHashing.digest16('foo').should == \
|
35
|
+
PearsonHashing.digest_with_x_bytes('foo', 2)
|
36
|
+
end
|
37
|
+
it 'should equal #digest21 with 3 byte' do
|
38
|
+
PearsonHashing.digest32('foo').should == \
|
39
|
+
PearsonHashing.digest_with_x_bytes('foo', 3)
|
19
40
|
end
|
20
41
|
end
|
42
|
+
|
21
43
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pearson-hashing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
12
|
+
date: 2012-01-18 00:00:00.000000000Z
|
13
13
|
dependencies: []
|
14
14
|
description: pearson hasing provides "fast hashing of variable-length text strings
|
15
15
|
email:
|