bloombroom 1.0.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,8 @@
1
+ module Bloombroom
2
+ class Process
3
+
4
+ def self.rss
5
+ `ps -o rss= -p #{::Process.pid}`.to_i
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bloombroom/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bloombroom"
7
+ s.version = Bloombroom::VERSION
8
+ s.authors = ["Colin Surprenant"]
9
+ s.email = ["colin.surprenant@gmail.com"]
10
+ s.homepage = "https://github.com/colinsurprenant/bloombroom"
11
+ s.summary = "bloom filters for bounded and unbounded (streaming) data, fast C/FFI FNV hashing and bit fields"
12
+ s.description = "bloombroom has two bloom filter implementations, a standard filter for bounded key space \
13
+ and a continuous filter for unbounded keys (stream). also contains fast C/FFI FNV hashing and fast bit field and \
14
+ bit bucket field (multi bits)."
15
+
16
+ s.files = `git ls-files`.split($/)
17
+ s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
19
+ s.require_paths = ["lib"]
20
+ s.extensions = ["ffi/bloombroom/hash/Rakefile"]
21
+
22
+ s.has_rdoc = false
23
+ s.license = 'Apache 2.0'
24
+
25
+ s.add_dependency "ffi", ">= 1.0.0"
26
+ s.add_dependency "ffi-compiler"
27
+ s.add_development_dependency "rspec", ">= 2.8.0"
28
+ end
@@ -0,0 +1,5 @@
1
+ require 'ffi-compiler/compile_task'
2
+
3
+ FFI::Compiler::CompileTask.new('ffi_fnv') do |c|
4
+ # nothing yet
5
+ end
@@ -1,13 +1,17 @@
1
+ require 'ffi'
2
+ require 'ffi-compiler/loader'
3
+
4
+ module Bloombroom
5
+ class FNVFFI
6
+ extend FFI::Library
7
+ ffi_lib FFI::Compiler::Loader.find('ffi_fnv')
8
+ end
9
+ end
10
+
1
11
  require "bloombroom/version"
2
12
  require "bloombroom/bits/bit_field"
3
13
  require "bloombroom/bits/bit_bucket_field"
4
14
  require "bloombroom/filter/bloom_helper"
5
15
  require "bloombroom/filter/bloom_filter"
6
16
  require "bloombroom/filter/continuous_bloom_filter"
7
- require "bloombroom/hash/fnv_a"
8
- require "bloombroom/hash/fnv_b"
9
- require "bloombroom/hash/cext_fnv"
10
17
  require "bloombroom/hash/ffi_fnv"
11
-
12
- module Bloombroom
13
- end
@@ -2,14 +2,10 @@ require 'ffi'
2
2
 
3
3
  module Bloombroom
4
4
  class FNVFFI
5
- extend FFI::Library
6
-
7
- ffi_lib File.dirname(__FILE__) + "/" + (FFI::Platform.mac? ? "ffi_fnv.bundle" : FFI.map_library_name("ffi_fnv"))
8
-
9
- attach_function :c_fnv1_32, :fnv1_32, [:string, :uint32], :uint32
10
- attach_function :c_fnv1a_32, :fnv1a_32, [:string, :uint32], :uint32
11
- attach_function :c_fnv1_64, :fnv1_64, [:string, :uint32], :uint64
12
- attach_function :c_fnv1a_64, :fnv1a_64, [:string, :uint32], :uint64
5
+ attach_function :c_fnv1_32, :fnv1_32, [:buffer_in, :uint32], :uint32, :save_errno => false
6
+ attach_function :c_fnv1a_32, :fnv1a_32, [:buffer_in, :uint32], :uint32, :save_errno => false
7
+ attach_function :c_fnv1_64, :fnv1_64, [:buffer_in, :uint32], :uint64, :save_errno => false
8
+ attach_function :c_fnv1a_64, :fnv1a_64, [:buffer_in, :uint32], :uint64, :save_errno => false
13
9
 
14
10
  def self.fnv1_32(data)
15
11
  c_fnv1_32(data, data.size)
@@ -1,3 +1,3 @@
1
1
  module Bloombroom
2
- VERSION = "1.0.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/bits/bit_bucket_field'
3
+
4
+ describe Bloombroom::BitBucketField do
5
+
6
+ it "should be all unset at initialization" do
7
+ bf = Bloombroom::BitBucketField.new(8, 100)
8
+ 100.times.each do |i|
9
+ bf[i].should == 0
10
+ end
11
+ bf.total_set.should == 0
12
+ end
13
+
14
+ it "should set value" do
15
+ bf = Bloombroom::BitBucketField.new(4, 16)
16
+
17
+ 16.times.each do |i|
18
+ bf[0] = i
19
+ bf[2] = i
20
+ bf[4] = i
21
+ bf[6] = i
22
+ bf[8] = i
23
+ bf[0].should == i
24
+ bf[1].should == 0
25
+ bf[2].should == i
26
+ bf[3].should == 0
27
+ bf[4].should == i
28
+ bf[5].should == 0
29
+ bf[6].should == i
30
+ bf[7].should == 0
31
+ bf[8].should == i
32
+ end
33
+
34
+ 16.times.each do |i|
35
+ bf[0] = 0
36
+ bf[1] = i
37
+ bf[2] = 0
38
+ bf[3] = i
39
+ bf[4] = 0
40
+ bf[5] = i
41
+ bf[6] = 0
42
+ bf[7] = i
43
+ bf[8] = 0
44
+ bf[0].should == 0
45
+ bf[1].should == i
46
+ bf[2].should == 0
47
+ bf[3].should == i
48
+ bf[4].should == 0
49
+ bf[5].should == i
50
+ bf[6].should == 0
51
+ bf[7].should == i
52
+ bf[8].should == 0
53
+ end
54
+
55
+ 16.times do |value|
56
+ 16.times.each do |i|
57
+ bf[i] = value
58
+ end
59
+ 16.times.each do |i|
60
+ bf[i].should == value
61
+ end
62
+ end
63
+ end
64
+
65
+ it "should randomly set" do
66
+ bf = Bloombroom::BitBucketField.new(4, 1000)
67
+ random_buckets = Array.new(500) {rand(1000)}.uniq
68
+ random_values = Array.new(random_buckets.size) {rand(16)} # values between 0 and 15
69
+ bucket_value = random_buckets.zip(random_values)
70
+ other_buckets = Array.new(1000) {|i| i} - random_buckets
71
+
72
+ bucket_value.each{|b, v| bf[b] = v}
73
+ other_buckets.each{|i| bf[i].should == 0}
74
+ bucket_value.each{|b, v| bf[b].should == v}
75
+
76
+ other_buckets.each{|i| bf[i] = 0}
77
+ other_buckets.each{|i| bf[i].should == 0}
78
+ bucket_value.each{|b, v| bf[b].should == v}
79
+
80
+ random_buckets.each{|i| bf[i] = 0}
81
+ other_buckets.each{|i| bf[i].should == 0}
82
+ random_buckets.each{|i| bf[i].should == 0}
83
+ end
84
+
85
+ it "should report size" do
86
+ bf = Bloombroom::BitBucketField.new(4, 56)
87
+ bf.size.should == 56
88
+ end
89
+
90
+ it "should report total_set" do
91
+ bf = Bloombroom::BitBucketField.new(16, 100)
92
+ bf[0] = (2 ** 16) - 1
93
+ bf[1] = (2 ** 16) - 1
94
+ bf[2] = (2 ** 16) - 1
95
+ bf[4] = (2 ** 16) - 1
96
+ bf[50] = 1
97
+ bf.total_set.should == 5
98
+
99
+ bf = Bloombroom::BitBucketField.new(4, 1000)
100
+ random_buckets = Array.new(500) {rand(1000)}.uniq
101
+ random_values = Array.new(random_buckets.size) {rand(14) + 1} # values between 1 and 15
102
+ bucket_value = random_buckets.zip(random_values)
103
+ other_buckets = Array.new(1000) {|i| i} - random_buckets
104
+
105
+ bucket_value.each{|b, v| bf[b] = v}
106
+ bf.total_set.should == random_buckets.size
107
+
108
+ other_buckets.each{|i| bf[i] = 0}
109
+ other_buckets.each{|i| bf[i].should == 0}
110
+ bf.total_set.should == bucket_value.size
111
+
112
+ random_buckets.each{|i| bf[i] = 0}
113
+ bf.total_set.should == 0
114
+ end
115
+
116
+ it "should produce bit string using to_s" do
117
+ bf = Bloombroom::BitBucketField.new(4, 1)
118
+ bf[0] = 1
119
+ bf.to_s.should == "0001"
120
+ bf.to_s(10).should == "1"
121
+ bf[0] = 15
122
+ bf.to_s.should == "1111"
123
+ bf.to_s(10).should == "15"
124
+
125
+ bf = Bloombroom::BitBucketField.new(4, 2)
126
+ bf[0] = 3
127
+ bf[1] = 8
128
+ bf.to_s.should == "0011 1000"
129
+ bf.to_s(10).should == "3 8"
130
+
131
+ bf = Bloombroom::BitBucketField.new(4, 8)
132
+ bf[0] = 1
133
+ bf[2] = 2
134
+ bf[4] = 3
135
+ bf[6] = 4
136
+ bf.to_s.should == "0001 0000 0010 0000 0011 0000 0100 0000"
137
+ bf.to_s(10).should == "1 0 2 0 3 0 4 0"
138
+
139
+ lambda{bf.to_s(16)}.should raise_error
140
+ end
141
+
142
+ end
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/bits/bit_field'
3
+
4
+ describe Bloombroom::BitField do
5
+
6
+ it "should be all unset at initialization" do
7
+ bf = Bloombroom::BitField.new(100)
8
+ (0..99).each do |i|
9
+ bf[i].should == 0
10
+ bf.include?(i).should be_false
11
+ end
12
+ end
13
+
14
+ it "should set and unset" do
15
+ bf = Bloombroom::BitField.new(1000)
16
+ bf[100].should == 0
17
+ bf.include?(100).should be_false
18
+ bf[101].should == 0
19
+ bf.include?(101).should be_false
20
+
21
+ bf[100] = 1
22
+ bf[100].should == 1
23
+ bf.include?(100).should be_true
24
+ bf[100] = 0
25
+ bf[100].should == 0
26
+ bf.include?(100).should be_false
27
+
28
+ bf.set(101)
29
+ bf[101].should == 1
30
+ bf.include?(101).should be_true
31
+ bf.unset(101)
32
+ bf[101].should == 0
33
+ bf.include?(101).should be_false
34
+ end
35
+
36
+ it "should unset" do
37
+ bf = Bloombroom::BitField.new(100)
38
+ (0..99).each{|i| bf.include?(i).should be_false}
39
+ (0..31).each{|i| bf.unset(i)}
40
+ (0..99).each{|i| bf.include?(i).should be_false}
41
+
42
+ (0..31).each{|i| bf.set(i)}
43
+ (0..31).each{|i| bf.include?(i).should be_true}
44
+ (32..99).each{|i| bf.include?(i).should be_false}
45
+
46
+ unsetbits = [0, 5, 6, 10, 16, 23, 31]
47
+ unsetbits.each{|i| bf.unset(i)}
48
+ ((0..31).map{|i| i} - unsetbits).each{|i| bf.include?(i).should be_true}
49
+ unsetbits.each{|i| bf.include?(i).should be_false}
50
+ (32..99).each{|i| bf.include?(i).should be_false}
51
+ end
52
+
53
+ it "should randomly set and unset" do
54
+ bf = Bloombroom::BitField.new(1000)
55
+ random_bits = (0..250).map{|i| rand(1000)}
56
+ other_bits = (0..999).map{|i| i} - random_bits
57
+
58
+ random_bits.each{|i| bf.set(i)}
59
+ other_bits.each{|i| bf.include?(i).should be_false}
60
+ random_bits.each{|i| bf.include?(i).should be_true}
61
+
62
+ other_bits.each{|i| bf.unset(i)}
63
+ other_bits.each{|i| bf.include?(i).should be_false}
64
+ random_bits.each{|i| bf.include?(i).should be_true}
65
+
66
+ random_bits.each{|i| bf.unset(i)}
67
+ other_bits.each{|i| bf.include?(i).should be_false}
68
+ random_bits.each{|i| bf.include?(i).should be_false}
69
+ end
70
+
71
+ it "should randomly set and unset and support zero?" do
72
+ bf = Bloombroom::BitField.new(1000)
73
+ random_bits = (0..250).map{|i| rand(1000)}
74
+ other_bits = (0..999).map{|i| i} - random_bits
75
+
76
+ random_bits.each{|i| bf.set(i)}
77
+ other_bits.each{|i| bf.zero?(i).should be_true}
78
+ random_bits.each{|i| bf.zero?(i).should be_false}
79
+
80
+ other_bits.each{|i| bf.unset(i)}
81
+ other_bits.each{|i| bf.zero?(i).should be_true}
82
+ random_bits.each{|i| bf.zero?(i).should be_false}
83
+
84
+ random_bits.each{|i| bf.unset(i)}
85
+ other_bits.each{|i| bf.zero?(i).should be_true}
86
+ random_bits.each{|i| bf.zero?(i).should be_true}
87
+ end
88
+
89
+ it "should report size" do
90
+ bf = Bloombroom::BitField.new(456)
91
+ bf.size.should == 456
92
+ end
93
+
94
+ it "should produce bit string using to_s" do
95
+ bf = Bloombroom::BitField.new(10)
96
+ bf[1] = 1
97
+ bf[5] = 1
98
+ bf.to_s.should == "0100010000"
99
+ end
100
+
101
+ it "should report total_set" do
102
+ bf = Bloombroom::BitField.new(10)
103
+ bf[1] = 1
104
+ bf[5] = 1
105
+ bf.total_set.should == 2
106
+ end
107
+
108
+ end
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom'
3
+
4
+ describe Bloombroom::BloomFilter do
5
+
6
+ it "should add" do
7
+ bf = Bloombroom::BloomFilter.new(1000, 5)
8
+ bf.include?("abc1").should be_false
9
+ bf.include?("abc2").should be_false
10
+ bf.include?("abc3").should be_false
11
+
12
+ bf.add("abc1")
13
+ bf.include?("abc1").should be_true
14
+ bf.include?("abc2").should be_false
15
+ bf.include?("abc3").should be_false
16
+
17
+ bf.add("abc2")
18
+ bf.include?("abc1").should be_true
19
+ bf.include?("abc2").should be_true
20
+ bf.include?("abc3").should be_false
21
+
22
+ bf.add("abc3")
23
+ bf.include?("abc1").should be_true
24
+ bf.include?("abc2").should be_true
25
+ bf.include?("abc3").should be_true
26
+ end
27
+
28
+ it "should keep track of size" do
29
+ bf = Bloombroom::BloomFilter.new(1000, 5)
30
+ bf.size.should == 0
31
+ bf.add("abc1")
32
+ bf.size.should == 1
33
+ bf.add("abc2")
34
+ bf.size.should == 2
35
+ end
36
+
37
+ it "should find m and k" do
38
+ bf = Bloombroom::BloomFilter.new(*Bloombroom::BloomHelper.find_m_k(10000, 0.001))
39
+ bf.m.should == 143776
40
+ bf.k.should == 10
41
+ end
42
+
43
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/filter/bloom_helper'
3
+
4
+ describe Bloombroom::BloomHelper do
5
+
6
+ it "should multi_hash" do
7
+ h = Bloombroom::BloomHelper.multi_hash("feedfacedeadbeef", 5)
8
+ h.size.should == 5
9
+ # test vector for fnv1a_64 for "feedfacedeadbeef" -> 0xcac54572bb1a6fc8
10
+ h.should == Array.new(5) {|i| ((0xcac54572bb1a6fc8 & 0xFFFFFFFF00000000) >> 32) + (0xcac54572bb1a6fc8 & 0xFFFFFFFF) * (i + 1)}
11
+ end
12
+
13
+ it "should find m and k" do
14
+ Bloombroom::BloomHelper.find_m_k(10000, 0.01).should == [95851, 7]
15
+ Bloombroom::BloomHelper.find_m_k(10000, 0.001).should == [143776, 10]
16
+ end
17
+
18
+ end
@@ -0,0 +1,107 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/filter/continuous_bloom_filter'
3
+ # require 'bloombroom/filter/bloom_helper'
4
+
5
+ describe Bloombroom::ContinuousBloomFilter do
6
+
7
+ it "should add" do
8
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(10, 0.001), 0)
9
+ bf.include?("abc1").should be_false
10
+ bf.include?("abc2").should be_false
11
+ bf.include?("abc3").should be_false
12
+
13
+ bf.add("abc1")
14
+ bf.include?("abc1").should be_true
15
+ bf.include?("abc2").should be_false
16
+ bf.include?("abc3").should be_false
17
+
18
+ bf.add("abc2")
19
+ bf.include?("abc1").should be_true
20
+ bf.include?("abc2").should be_true
21
+ bf.include?("abc3").should be_false
22
+
23
+ bf.add("abc3")
24
+ bf.include?("abc1").should be_true
25
+ bf.include?("abc2").should be_true
26
+ bf.include?("abc3").should be_true
27
+ end
28
+
29
+ it "should find m and k" do
30
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(10000, 0.001), 0)
31
+ bf.m.should == 143776
32
+ bf.k.should == 10
33
+ end
34
+
35
+ it "should expire" do
36
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(100, 0.001), 0)
37
+ bf.add("abc1")
38
+ bf.include?("abc1").should be_true
39
+
40
+ bf.inc_time_slot
41
+ bf.add("abc2")
42
+ bf.include?("abc1").should be_true
43
+ bf.include?("abc2").should be_true
44
+
45
+ bf.inc_time_slot
46
+ bf.add("abc3")
47
+ bf.include?("abc1").should be_true
48
+ bf.include?("abc2").should be_true
49
+ bf.include?("abc3").should be_true
50
+
51
+ bf.inc_time_slot
52
+ bf.add("abc4")
53
+ bf.include?("abc1").should be_false
54
+ bf.include?("abc2").should be_true
55
+ bf.include?("abc3").should be_true
56
+ bf.include?("abc4").should be_true
57
+
58
+ bf.inc_time_slot
59
+ bf.include?("abc1").should be_false
60
+ bf.include?("abc2").should be_false
61
+ bf.include?("abc3").should be_true
62
+ bf.include?("abc4").should be_true
63
+
64
+ bf.inc_time_slot
65
+ bf.include?("abc1").should be_false
66
+ bf.include?("abc2").should be_false
67
+ bf.include?("abc3").should be_false
68
+ bf.include?("abc4").should be_true
69
+
70
+ bf.inc_time_slot
71
+ bf.include?("abc1").should be_false
72
+ bf.include?("abc2").should be_false
73
+ bf.include?("abc3").should be_false
74
+ bf.include?("abc4").should be_false
75
+
76
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(100, 0.1), 0)
77
+ keys = []
78
+ 1.upto(100) do |i|
79
+ keys << "#{i}test#{i}"
80
+ bf.add(keys.last)
81
+ alive = keys[[keys.size - 3, 0].max, 3]
82
+ expired = keys - alive
83
+
84
+ alive.each{|key| bf.include?(key).should be_true}
85
+ expired.each{|key| bf.include?(key).should be_false}
86
+
87
+ bf.inc_time_slot
88
+ end
89
+ end
90
+
91
+ it "should compute elapse" do
92
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(100, 0.1), 0)
93
+ bf.send(:elapsed, 1, 1).should == 0
94
+ bf.send(:elapsed, 1, 2).should == 1
95
+ bf.send(:elapsed, 1, 3).should == 2
96
+
97
+ bf.send(:elapsed, 2, 14).should == 12
98
+ bf.send(:elapsed, 2, 15).should == 13
99
+ bf.send(:elapsed, 2, 1).should == 14
100
+ bf.send(:elapsed, 3, 1).should == 13
101
+ bf.send(:elapsed, 15, 1).should == 1
102
+ bf.send(:elapsed, 15, 2).should == 2
103
+ bf.send(:elapsed, 15, 14).should == 14
104
+ end
105
+
106
+
107
+ end