pearson-hashing 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
 
3
3
  require File.expand_path('../../lib/pearson-hashing', __FILE__)
4
4
 
5
- SIZE = 100_000
5
+ SIZE = 512**2
6
6
 
7
7
  def benchmark(name, &block)
8
8
  puts "#{name}.."
@@ -21,11 +21,15 @@ end
21
21
  benchmark 'PearsonHashing#digest8' do |str|
22
22
  PearsonHashing.digest8 str
23
23
  end
24
-
24
+
25
25
  benchmark 'PearsonHashing#digest16' do |str|
26
26
  PearsonHashing.digest16 str
27
27
  end
28
28
 
29
+ benchmark 'PearsonHashing#digest32' do |str|
30
+ PearsonHashing.digest32 str
31
+ end
32
+
29
33
  benchmark 'String#hash' do |str|
30
34
  str.hash
31
35
  end
@@ -3,21 +3,29 @@
3
3
  require 'benchmark'
4
4
  require File.expand_path('../../lib/pearson-hashing', __FILE__)
5
5
 
6
+ SIZE = 512**2
7
+
6
8
  Benchmark.bm do |b|
7
9
  b.report 'PearsonHashing#digest8' do
8
- 100_000.times do |i|
10
+ SIZE.times do |i|
9
11
  PearsonHashing.digest8 i.to_s
10
12
  end
11
13
  end
12
14
 
13
15
  b.report 'PearsonHashing#digest16' do
14
- 100_000.times do |i|
16
+ SIZE.times do |i|
15
17
  PearsonHashing.digest16 i.to_s
16
18
  end
17
19
  end
18
20
 
21
+ b.report 'PearsonHashing#digest32' do
22
+ SIZE.times do |i|
23
+ PearsonHashing.digest32 i.to_s
24
+ end
25
+ end
26
+
19
27
  b.report 'String#hash' do
20
- 100_000.times do |i|
28
+ SIZE.times do |i|
21
29
  i.to_s.hash
22
30
  end
23
31
  end
@@ -1,6 +1,5 @@
1
1
  require File.join(File.dirname(__FILE__), 'pearson-hashing/version')
2
2
 
3
-
4
3
  module PearsonHashing
5
4
  TABLE = [49, 118, 63, 252, 13, 155, 114, 130, 137, 40, 210, 62, 219, 246, 136, 221,
6
5
  174, 106, 37, 227, 166, 25, 139, 19, 204, 212, 64, 176, 70, 11, 170, 58,
@@ -36,6 +35,8 @@ module PearsonHashing
36
35
  end
37
36
  hash
38
37
  end
38
+
39
+ # make #digest8 the default
39
40
  class << self
40
41
  alias :digest :digest8
41
42
  end
@@ -47,9 +48,48 @@ module PearsonHashing
47
48
  # @param [String] string
48
49
  # @return [Fixnum] hash
49
50
  def self.digest16(string)
50
- h1 = PearsonHashing.digest(string)
51
- string2 = [((string.bytes.first+1)%256)].pack('U*') + string[1,string.size]
52
- h2 = PearsonHashing.digest(string2)
53
- ("%03d" % h1 + "%03d" % h2).to_i
51
+ h1 = PearsonHashing.digest8 string
52
+ h2 = PearsonHashing.digest8 shift string
53
+ format = '%03d'
54
+ (format % h1 + format % h2).to_i
55
+ end
56
+
57
+ # 32 bit hash
58
+ # @param [String] string
59
+ # @param [Fixnum] hashvalue
60
+ def self.digest32(string)
61
+ h1 = PearsonHashing.digest8 string
62
+ h2 = PearsonHashing.digest8 shift string
63
+ h3 = PearsonHashing.digest8 shift shift string
64
+ format = '%03d'
65
+ (format % h1 + format % h2 + format % h3).to_i
66
+ end
67
+
68
+ # this is a dynamic implementation just to enable longer hashes
69
+ # but known to be slower than the hardcoded methods above
70
+ #
71
+ # 1 byte = 8 bit
72
+ # 2 bytes = 16 bit
73
+ # ...
74
+ #
75
+ # @param [String] string
76
+ # @param [Fixnum] no of bytes
77
+ # @return [Fixnum] hashvalue
78
+ def self.digest_with_x_bytes(string, no_of_bytes)
79
+ tmp_str = string.dup
80
+ result = ''
81
+ no_of_bytes.times do
82
+ result = result + "%03d" % PearsonHashing.digest8(tmp_str)
83
+ tmp_str = shift(tmp_str)
84
+ end
85
+ result.to_i
86
+ end
87
+
88
+ private
89
+
90
+ # @param [String] str
91
+ # @return [String] str with first byte moved up
92
+ def self.shift(str)
93
+ [(str.bytes.first+1)%256].pack('U*') + str[1,str.size]
54
94
  end
55
95
  end
@@ -1,3 +1,3 @@
1
1
  module PearsonHashing
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
@@ -1,6 +1,12 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe PearsonHashing do
4
+ describe '#digest' do
5
+ it 'should return the same as #digest8' do
6
+ PearsonHashing.digest('foo').should == PearsonHashing.digest8('foo')
7
+ end
8
+ end
9
+
4
10
  describe '#digest8' do
5
11
  it 'should return 96 for "foo"' do
6
12
  PearsonHashing.digest('foo').should == 96
@@ -13,9 +19,25 @@ describe PearsonHashing do
13
19
  end
14
20
  end
15
21
 
16
- describe '#digest' do
17
- it 'should return the same as #digest8' do
18
- PearsonHashing.digest('foo').should == PearsonHashing.digest8('foo')
22
+ describe '#digest32' do
23
+ it 'should return 96 for "foo"' do
24
+ PearsonHashing.digest32('foo').should == 96099141
25
+ end
26
+ end
27
+
28
+ describe '#digest_with_x_bytes' do
29
+ it 'should equal #digest8 with 1 byte' do
30
+ PearsonHashing.digest8('foo').should == \
31
+ PearsonHashing.digest_with_x_bytes('foo', 1)
32
+ end
33
+ it 'should equal #digest16 with 2 byte' do
34
+ PearsonHashing.digest16('foo').should == \
35
+ PearsonHashing.digest_with_x_bytes('foo', 2)
36
+ end
37
+ it 'should equal #digest21 with 3 byte' do
38
+ PearsonHashing.digest32('foo').should == \
39
+ PearsonHashing.digest_with_x_bytes('foo', 3)
19
40
  end
20
41
  end
42
+
21
43
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pearson-hashing
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-17 00:00:00.000000000Z
12
+ date: 2012-01-18 00:00:00.000000000Z
13
13
  dependencies: []
14
14
  description: pearson hasing provides "fast hashing of variable-length text strings
15
15
  email: