bloomfilter 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,2 @@
1
- == 0.0.1 2007-09-05
2
-
3
- * 1 major enhancement:
4
- * Initial release
1
+ == 0.1.1 2007-09-05
2
+ Some files were missing from the gem manifest.
@@ -5,9 +5,12 @@ README.txt
5
5
  Rakefile
6
6
  lib/bloomfilter.rb
7
7
  lib/bloomfilter/version.rb
8
+ lib/bloomfilter/bloomfilter.rb
9
+ lib/bloomfilter/external_bloom_filter.rb
8
10
  scripts/txt2html
9
11
  setup.rb
10
12
  test/test_bloomfilter.rb
13
+ test/test_external_bloom_filter.rb
11
14
  test/test_helper.rb
12
15
  website/index.html
13
16
  website/index.txt
@@ -0,0 +1,87 @@
1
+ require 'digest/sha1'
2
+ require "rubygems"
3
+ require "inline"
4
+
5
+ class FasterBitField
6
+ attr_reader :size
7
+
8
+ ELEMENT_WIDTH = 8
9
+
10
+ def initialize(size, field = nil)
11
+ @size = size
12
+ @field = field || "\000" * (((size - 1) / ELEMENT_WIDTH) + 1)
13
+ end
14
+
15
+ def set(position)
16
+ set_c(@field, position)
17
+ #@field[position / ELEMENT_WIDTH] |= (1 << (position % ELEMENT_WIDTH))
18
+ end
19
+
20
+ # Read a bit (1/0)
21
+ def [](position)
22
+ get_c(@field, position)
23
+ #@field[position / ELEMENT_WIDTH] & (1 << (position % ELEMENT_WIDTH)) > 0
24
+ end
25
+
26
+ inline do |builder|
27
+ builder.c <<-code
28
+ void set_c(char *bits, int position) {
29
+ bits[position / 8] |= (1 << (position % 8) );
30
+ }
31
+ code
32
+
33
+ builder.c <<-code
34
+ VALUE get_c(char *bits, int position) {
35
+ int flag = bits[position / 8] & (1 << ( position % 8 ));
36
+ if( flag > 0){
37
+ return Qtrue;
38
+ }
39
+ else
40
+ return Qfalse;
41
+ }
42
+ code
43
+ end
44
+
45
+ def to_s
46
+ @field
47
+ end
48
+ end
49
+
50
+ class BloomFilter
51
+ def initialize(num_bits, field = nil)
52
+ if field
53
+ @bits = FasterBitField.new(num_bits, field)
54
+ else
55
+ @bits = FasterBitField.new(num_bits)
56
+ end
57
+
58
+ @num_bits = num_bits
59
+ end
60
+
61
+ def hash_value(item)
62
+ Digest::SHA1.hexdigest(item).scan(/.{8}/).map{|str| str.to_i(16)}
63
+ end
64
+
65
+ def add(item)
66
+ hash_value(item).each{|bit_position| @bits.set(bit_position % @num_bits)}
67
+ end
68
+
69
+ def include?(item)
70
+ hash_value(item).each{|bit_position| return false unless @bits[bit_position % @num_bits]}
71
+ true
72
+ end
73
+
74
+ def save(path)
75
+ File.open(path, "w") do |file|
76
+ file.write @bits.to_s
77
+ end
78
+ end
79
+
80
+ def self.load(path)
81
+ field = ""
82
+ File.open(path, "r") do |file|
83
+ field = file.read
84
+ end
85
+ BloomFilter.new(field.size, field)
86
+ end
87
+ end
@@ -0,0 +1,71 @@
1
+ require 'digest/sha1'
2
+
3
+ class ExternalBitField
4
+ attr_reader :size
5
+
6
+ ELEMENT_WIDTH = 8
7
+
8
+ def self.create(path, size)
9
+ File.open(path, "w") do |file|
10
+ file.write "\000" * (((size - 1) / ELEMENT_WIDTH) + 1)
11
+ end
12
+ ExternalBitField.new(path)
13
+ end
14
+
15
+ def initialize(path)
16
+ @size = File.size(path) * ELEMENT_WIDTH
17
+ @field = File.new(path, "r+")
18
+ end
19
+
20
+ def set(position)
21
+ #debugger
22
+ # seek to the position in the file where we'll be making changes
23
+ @field.seek(position / ELEMENT_WIDTH)
24
+
25
+ # read the old value, converted to an integer
26
+ old_val = @field.read(1)[0]
27
+
28
+ # seek back to our spot
29
+ @field.seek(position / ELEMENT_WIDTH)
30
+
31
+ # write the new value, as a character again this time
32
+ @field.write( (old_val | 1 << (position % ELEMENT_WIDTH)).chr )
33
+ end
34
+
35
+ # Read a bit (1/0)
36
+ def [](position)
37
+ # seek to the appropriate byte position
38
+ @field.seek(position / ELEMENT_WIDTH)
39
+
40
+ # read the value
41
+ val = @field.read(1)[0]
42
+
43
+ # figure out if our bit is flipped or not
44
+ (val & (1 << (position % ELEMENT_WIDTH))) > 0
45
+ end
46
+
47
+ end
48
+
49
+ class ExternalBloomFilter
50
+ def self.create(path, size)
51
+ ExternalBitField.create(path, size)
52
+ ExternalBloomFilter.new(path)
53
+ end
54
+
55
+ def initialize(path)
56
+ @bits = ExternalBitField.new(path)
57
+ end
58
+
59
+ def hash_value(item)
60
+ Digest::SHA1.hexdigest(item).scan(/.{8}/).map{|str| str.to_i(16)}
61
+ end
62
+
63
+ def add(item)
64
+ hash_value(item).each{|bit_position| @bits.set(bit_position % @bits.size)}
65
+ end
66
+
67
+ def include?(item)
68
+ hash_value(item).each{|bit_position| return false unless @bits[bit_position % @bits.size]}
69
+ true
70
+ end
71
+ end
@@ -2,7 +2,7 @@ module Bloomfilter #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -33,7 +33,7 @@
33
33
  <h1>bloomfilter</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/bloomfilter"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/bloomfilter" class="numbers">0.1.0</a>
36
+ <a href="http://rubyforge.org/projects/bloomfilter" class="numbers">0.1.1</a>
37
37
  </div>
38
38
  <h1>&#x2192; &#8216;bloomfilter&#8217;</h1>
39
39
 
metadata CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: bloomfilter
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.0
6
+ version: 0.1.1
7
7
  date: 2007-09-05 00:00:00 -07:00
8
8
  summary: Two Bloom filter implementations.
9
9
  require_paths:
@@ -36,9 +36,12 @@ files:
36
36
  - Rakefile
37
37
  - lib/bloomfilter.rb
38
38
  - lib/bloomfilter/version.rb
39
+ - lib/bloomfilter/bloomfilter.rb
40
+ - lib/bloomfilter/external_bloom_filter.rb
39
41
  - scripts/txt2html
40
42
  - setup.rb
41
43
  - test/test_bloomfilter.rb
44
+ - test/test_external_bloom_filter.rb
42
45
  - test/test_helper.rb
43
46
  - website/index.html
44
47
  - website/index.txt