bloomfilter 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +2 -4
- data/Manifest.txt +3 -0
- data/lib/bloomfilter/bloomfilter.rb +87 -0
- data/lib/bloomfilter/external_bloom_filter.rb +71 -0
- data/lib/bloomfilter/version.rb +1 -1
- data/website/index.html +1 -1
- metadata +4 -1
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -5,9 +5,12 @@ README.txt
|
|
5
5
|
Rakefile
|
6
6
|
lib/bloomfilter.rb
|
7
7
|
lib/bloomfilter/version.rb
|
8
|
+
lib/bloomfilter/bloomfilter.rb
|
9
|
+
lib/bloomfilter/external_bloom_filter.rb
|
8
10
|
scripts/txt2html
|
9
11
|
setup.rb
|
10
12
|
test/test_bloomfilter.rb
|
13
|
+
test/test_external_bloom_filter.rb
|
11
14
|
test/test_helper.rb
|
12
15
|
website/index.html
|
13
16
|
website/index.txt
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
require "rubygems"
|
3
|
+
require "inline"
|
4
|
+
|
5
|
+
class FasterBitField
|
6
|
+
attr_reader :size
|
7
|
+
|
8
|
+
ELEMENT_WIDTH = 8
|
9
|
+
|
10
|
+
def initialize(size, field = nil)
|
11
|
+
@size = size
|
12
|
+
@field = field || "\000" * (((size - 1) / ELEMENT_WIDTH) + 1)
|
13
|
+
end
|
14
|
+
|
15
|
+
def set(position)
|
16
|
+
set_c(@field, position)
|
17
|
+
#@field[position / ELEMENT_WIDTH] |= (1 << (position % ELEMENT_WIDTH))
|
18
|
+
end
|
19
|
+
|
20
|
+
# Read a bit (1/0)
|
21
|
+
def [](position)
|
22
|
+
get_c(@field, position)
|
23
|
+
#@field[position / ELEMENT_WIDTH] & (1 << (position % ELEMENT_WIDTH)) > 0
|
24
|
+
end
|
25
|
+
|
26
|
+
inline do |builder|
|
27
|
+
builder.c <<-code
|
28
|
+
void set_c(char *bits, int position) {
|
29
|
+
bits[position / 8] |= (1 << (position % 8) );
|
30
|
+
}
|
31
|
+
code
|
32
|
+
|
33
|
+
builder.c <<-code
|
34
|
+
VALUE get_c(char *bits, int position) {
|
35
|
+
int flag = bits[position / 8] & (1 << ( position % 8 ));
|
36
|
+
if( flag > 0){
|
37
|
+
return Qtrue;
|
38
|
+
}
|
39
|
+
else
|
40
|
+
return Qfalse;
|
41
|
+
}
|
42
|
+
code
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_s
|
46
|
+
@field
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
class BloomFilter
|
51
|
+
def initialize(num_bits, field = nil)
|
52
|
+
if field
|
53
|
+
@bits = FasterBitField.new(num_bits, field)
|
54
|
+
else
|
55
|
+
@bits = FasterBitField.new(num_bits)
|
56
|
+
end
|
57
|
+
|
58
|
+
@num_bits = num_bits
|
59
|
+
end
|
60
|
+
|
61
|
+
def hash_value(item)
|
62
|
+
Digest::SHA1.hexdigest(item).scan(/.{8}/).map{|str| str.to_i(16)}
|
63
|
+
end
|
64
|
+
|
65
|
+
def add(item)
|
66
|
+
hash_value(item).each{|bit_position| @bits.set(bit_position % @num_bits)}
|
67
|
+
end
|
68
|
+
|
69
|
+
def include?(item)
|
70
|
+
hash_value(item).each{|bit_position| return false unless @bits[bit_position % @num_bits]}
|
71
|
+
true
|
72
|
+
end
|
73
|
+
|
74
|
+
def save(path)
|
75
|
+
File.open(path, "w") do |file|
|
76
|
+
file.write @bits.to_s
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.load(path)
|
81
|
+
field = ""
|
82
|
+
File.open(path, "r") do |file|
|
83
|
+
field = file.read
|
84
|
+
end
|
85
|
+
BloomFilter.new(field.size, field)
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
|
3
|
+
class ExternalBitField
|
4
|
+
attr_reader :size
|
5
|
+
|
6
|
+
ELEMENT_WIDTH = 8
|
7
|
+
|
8
|
+
def self.create(path, size)
|
9
|
+
File.open(path, "w") do |file|
|
10
|
+
file.write "\000" * (((size - 1) / ELEMENT_WIDTH) + 1)
|
11
|
+
end
|
12
|
+
ExternalBitField.new(path)
|
13
|
+
end
|
14
|
+
|
15
|
+
def initialize(path)
|
16
|
+
@size = File.size(path) * ELEMENT_WIDTH
|
17
|
+
@field = File.new(path, "r+")
|
18
|
+
end
|
19
|
+
|
20
|
+
def set(position)
|
21
|
+
#debugger
|
22
|
+
# seek to the position in the file where we'll be making changes
|
23
|
+
@field.seek(position / ELEMENT_WIDTH)
|
24
|
+
|
25
|
+
# read the old value, converted to an integer
|
26
|
+
old_val = @field.read(1)[0]
|
27
|
+
|
28
|
+
# seek back to our spot
|
29
|
+
@field.seek(position / ELEMENT_WIDTH)
|
30
|
+
|
31
|
+
# write the new value, as a character again this time
|
32
|
+
@field.write( (old_val | 1 << (position % ELEMENT_WIDTH)).chr )
|
33
|
+
end
|
34
|
+
|
35
|
+
# Read a bit (1/0)
|
36
|
+
def [](position)
|
37
|
+
# seek to the appropriate byte position
|
38
|
+
@field.seek(position / ELEMENT_WIDTH)
|
39
|
+
|
40
|
+
# read the value
|
41
|
+
val = @field.read(1)[0]
|
42
|
+
|
43
|
+
# figure out if our bit is flipped or not
|
44
|
+
(val & (1 << (position % ELEMENT_WIDTH))) > 0
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
class ExternalBloomFilter
|
50
|
+
def self.create(path, size)
|
51
|
+
ExternalBitField.create(path, size)
|
52
|
+
ExternalBloomFilter.new(path)
|
53
|
+
end
|
54
|
+
|
55
|
+
def initialize(path)
|
56
|
+
@bits = ExternalBitField.new(path)
|
57
|
+
end
|
58
|
+
|
59
|
+
def hash_value(item)
|
60
|
+
Digest::SHA1.hexdigest(item).scan(/.{8}/).map{|str| str.to_i(16)}
|
61
|
+
end
|
62
|
+
|
63
|
+
def add(item)
|
64
|
+
hash_value(item).each{|bit_position| @bits.set(bit_position % @bits.size)}
|
65
|
+
end
|
66
|
+
|
67
|
+
def include?(item)
|
68
|
+
hash_value(item).each{|bit_position| return false unless @bits[bit_position % @bits.size]}
|
69
|
+
true
|
70
|
+
end
|
71
|
+
end
|
data/lib/bloomfilter/version.rb
CHANGED
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>bloomfilter</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/bloomfilter"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/bloomfilter" class="numbers">0.1.
|
36
|
+
<a href="http://rubyforge.org/projects/bloomfilter" class="numbers">0.1.1</a>
|
37
37
|
</div>
|
38
38
|
<h1>→ ‘bloomfilter’</h1>
|
39
39
|
|
metadata
CHANGED
@@ -3,7 +3,7 @@ rubygems_version: 0.9.4
|
|
3
3
|
specification_version: 1
|
4
4
|
name: bloomfilter
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
6
|
+
version: 0.1.1
|
7
7
|
date: 2007-09-05 00:00:00 -07:00
|
8
8
|
summary: Two Bloom filter implementations.
|
9
9
|
require_paths:
|
@@ -36,9 +36,12 @@ files:
|
|
36
36
|
- Rakefile
|
37
37
|
- lib/bloomfilter.rb
|
38
38
|
- lib/bloomfilter/version.rb
|
39
|
+
- lib/bloomfilter/bloomfilter.rb
|
40
|
+
- lib/bloomfilter/external_bloom_filter.rb
|
39
41
|
- scripts/txt2html
|
40
42
|
- setup.rb
|
41
43
|
- test/test_bloomfilter.rb
|
44
|
+
- test/test_external_bloom_filter.rb
|
42
45
|
- test/test_helper.rb
|
43
46
|
- website/index.html
|
44
47
|
- website/index.txt
|