pearson-hashing 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,7 +2,7 @@
2
2
 
3
3
  require File.expand_path('../../lib/pearson-hashing', __FILE__)
4
4
 
5
- SIZE = 100_000
5
+ SIZE = 512**2
6
6
 
7
7
  def benchmark(name, &block)
8
8
  puts "#{name}.."
@@ -21,11 +21,15 @@ end
21
21
  benchmark 'PearsonHashing#digest8' do |str|
22
22
  PearsonHashing.digest8 str
23
23
  end
24
-
24
+
25
25
  benchmark 'PearsonHashing#digest16' do |str|
26
26
  PearsonHashing.digest16 str
27
27
  end
28
28
 
29
+ benchmark 'PearsonHashing#digest32' do |str|
30
+ PearsonHashing.digest32 str
31
+ end
32
+
29
33
  benchmark 'String#hash' do |str|
30
34
  str.hash
31
35
  end
@@ -3,21 +3,29 @@
3
3
  require 'benchmark'
4
4
  require File.expand_path('../../lib/pearson-hashing', __FILE__)
5
5
 
6
+ SIZE = 512**2
7
+
6
8
  Benchmark.bm do |b|
7
9
  b.report 'PearsonHashing#digest8' do
8
- 100_000.times do |i|
10
+ SIZE.times do |i|
9
11
  PearsonHashing.digest8 i.to_s
10
12
  end
11
13
  end
12
14
 
13
15
  b.report 'PearsonHashing#digest16' do
14
- 100_000.times do |i|
16
+ SIZE.times do |i|
15
17
  PearsonHashing.digest16 i.to_s
16
18
  end
17
19
  end
18
20
 
21
+ b.report 'PearsonHashing#digest32' do
22
+ SIZE.times do |i|
23
+ PearsonHashing.digest32 i.to_s
24
+ end
25
+ end
26
+
19
27
  b.report 'String#hash' do
20
- 100_000.times do |i|
28
+ SIZE.times do |i|
21
29
  i.to_s.hash
22
30
  end
23
31
  end
@@ -1,6 +1,5 @@
1
1
  require File.join(File.dirname(__FILE__), 'pearson-hashing/version')
2
2
 
3
-
4
3
  module PearsonHashing
5
4
  TABLE = [49, 118, 63, 252, 13, 155, 114, 130, 137, 40, 210, 62, 219, 246, 136, 221,
6
5
  174, 106, 37, 227, 166, 25, 139, 19, 204, 212, 64, 176, 70, 11, 170, 58,
@@ -36,6 +35,8 @@ module PearsonHashing
36
35
  end
37
36
  hash
38
37
  end
38
+
39
+ # make #digest8 the default
39
40
  class << self
40
41
  alias :digest :digest8
41
42
  end
@@ -47,9 +48,48 @@ module PearsonHashing
47
48
  # @param [String] string
48
49
  # @return [Fixnum] hash
49
50
  def self.digest16(string)
50
- h1 = PearsonHashing.digest(string)
51
- string2 = [((string.bytes.first+1)%256)].pack('U*') + string[1,string.size]
52
- h2 = PearsonHashing.digest(string2)
53
- ("%03d" % h1 + "%03d" % h2).to_i
51
+ h1 = PearsonHashing.digest8 string
52
+ h2 = PearsonHashing.digest8 shift string
53
+ format = '%03d'
54
+ (format % h1 + format % h2).to_i
55
+ end
56
+
57
+ # 32 bit hash
58
+ # @param [String] string
59
+ # @param [Fixnum] hashvalue
60
+ def self.digest32(string)
61
+ h1 = PearsonHashing.digest8 string
62
+ h2 = PearsonHashing.digest8 shift string
63
+ h3 = PearsonHashing.digest8 shift shift string
64
+ format = '%03d'
65
+ (format % h1 + format % h2 + format % h3).to_i
66
+ end
67
+
68
+ # this is a dynamic implementation just to enable longer hashes
69
+ # but known to be slower than the hardcoded methods above
70
+ #
71
+ # 1 byte = 8 bit
72
+ # 2 bytes = 16 bit
73
+ # ...
74
+ #
75
+ # @param [String] string
76
+ # @param [Fixnum] no of bytes
77
+ # @return [Fixnum] hashvalue
78
+ def self.digest_with_x_bytes(string, no_of_bytes)
79
+ tmp_str = string.dup
80
+ result = ''
81
+ no_of_bytes.times do
82
+ result = result + "%03d" % PearsonHashing.digest8(tmp_str)
83
+ tmp_str = shift(tmp_str)
84
+ end
85
+ result.to_i
86
+ end
87
+
88
+ private
89
+
90
+ # @param [String] str
91
+ # @return [String] str with first byte moved up
92
+ def self.shift(str)
93
+ [(str.bytes.first+1)%256].pack('U*') + str[1,str.size]
54
94
  end
55
95
  end
@@ -1,3 +1,3 @@
1
1
  module PearsonHashing
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -1,6 +1,12 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe PearsonHashing do
4
+ describe '#digest' do
5
+ it 'should return the same as #digest8' do
6
+ PearsonHashing.digest('foo').should == PearsonHashing.digest8('foo')
7
+ end
8
+ end
9
+
4
10
  describe '#digest8' do
5
11
  it 'should return 96 for "foo"' do
6
12
  PearsonHashing.digest('foo').should == 96
@@ -13,9 +19,25 @@ describe PearsonHashing do
13
19
  end
14
20
  end
15
21
 
16
- describe '#digest' do
17
- it 'should return the same as #digest8' do
18
- PearsonHashing.digest('foo').should == PearsonHashing.digest8('foo')
22
+ describe '#digest32' do
23
+ it 'should return 96 for "foo"' do
24
+ PearsonHashing.digest32('foo').should == 96099141
25
+ end
26
+ end
27
+
28
+ describe '#digest_with_x_bytes' do
29
+ it 'should equal #digest8 with 1 byte' do
30
+ PearsonHashing.digest8('foo').should == \
31
+ PearsonHashing.digest_with_x_bytes('foo', 1)
32
+ end
33
+ it 'should equal #digest16 with 2 byte' do
34
+ PearsonHashing.digest16('foo').should == \
35
+ PearsonHashing.digest_with_x_bytes('foo', 2)
36
+ end
37
+ it 'should equal #digest21 with 3 byte' do
38
+ PearsonHashing.digest32('foo').should == \
39
+ PearsonHashing.digest_with_x_bytes('foo', 3)
19
40
  end
20
41
  end
42
+
21
43
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pearson-hashing
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-17 00:00:00.000000000Z
12
+ date: 2012-01-18 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: pearson hasing provides "fast hashing of variable-length text strings
15
15
  email: