bloombroom 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ module Bloombroom
2
+ class Process
3
+
4
+ def self.rss
5
+ `ps -o rss= -p #{::Process.pid}`.to_i
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bloombroom/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "bloombroom"
7
+ s.version = Bloombroom::VERSION
8
+ s.authors = ["Colin Surprenant"]
9
+ s.email = ["colin.surprenant@gmail.com"]
10
+ s.homepage = "https://github.com/colinsurprenant/bloombroom"
11
+ s.summary = "bloom filters for bounded and unbounded (streaming) data, fast C/FFI FNV hashing and bit fields"
12
+ s.description = "bloombroom has two bloom filter implementations, a standard filter for bounded key space \
13
+ and a continuous filter for unbounded keys (stream). also contains fast C/FFI FNV hashing and fast bit field and \
14
+ bit bucket field (multi bits)."
15
+
16
+ s.files = `git ls-files`.split($/)
17
+ s.executables = s.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
18
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
19
+ s.require_paths = ["lib"]
20
+ s.extensions = ["ffi/bloombroom/hash/Rakefile"]
21
+
22
+ s.has_rdoc = false
23
+ s.license = 'Apache 2.0'
24
+
25
+ s.add_dependency "ffi", ">= 1.0.0"
26
+ s.add_dependency "ffi-compiler"
27
+ s.add_development_dependency "rspec", ">= 2.8.0"
28
+ end
@@ -0,0 +1,5 @@
1
+ require 'ffi-compiler/compile_task'
2
+
3
+ FFI::Compiler::CompileTask.new('ffi_fnv') do |c|
4
+ # nothing yet
5
+ end
@@ -1,13 +1,17 @@
1
+ require 'ffi'
2
+ require 'ffi-compiler/loader'
3
+
4
+ module Bloombroom
5
+ class FNVFFI
6
+ extend FFI::Library
7
+ ffi_lib FFI::Compiler::Loader.find('ffi_fnv')
8
+ end
9
+ end
10
+
1
11
  require "bloombroom/version"
2
12
  require "bloombroom/bits/bit_field"
3
13
  require "bloombroom/bits/bit_bucket_field"
4
14
  require "bloombroom/filter/bloom_helper"
5
15
  require "bloombroom/filter/bloom_filter"
6
16
  require "bloombroom/filter/continuous_bloom_filter"
7
- require "bloombroom/hash/fnv_a"
8
- require "bloombroom/hash/fnv_b"
9
- require "bloombroom/hash/cext_fnv"
10
17
  require "bloombroom/hash/ffi_fnv"
11
-
12
- module Bloombroom
13
- end
@@ -2,14 +2,10 @@ require 'ffi'
2
2
 
3
3
  module Bloombroom
4
4
  class FNVFFI
5
- extend FFI::Library
6
-
7
- ffi_lib File.dirname(__FILE__) + "/" + (FFI::Platform.mac? ? "ffi_fnv.bundle" : FFI.map_library_name("ffi_fnv"))
8
-
9
- attach_function :c_fnv1_32, :fnv1_32, [:string, :uint32], :uint32
10
- attach_function :c_fnv1a_32, :fnv1a_32, [:string, :uint32], :uint32
11
- attach_function :c_fnv1_64, :fnv1_64, [:string, :uint32], :uint64
12
- attach_function :c_fnv1a_64, :fnv1a_64, [:string, :uint32], :uint64
5
+ attach_function :c_fnv1_32, :fnv1_32, [:buffer_in, :uint32], :uint32, :save_errno => false
6
+ attach_function :c_fnv1a_32, :fnv1a_32, [:buffer_in, :uint32], :uint32, :save_errno => false
7
+ attach_function :c_fnv1_64, :fnv1_64, [:buffer_in, :uint32], :uint64, :save_errno => false
8
+ attach_function :c_fnv1a_64, :fnv1a_64, [:buffer_in, :uint32], :uint64, :save_errno => false
13
9
 
14
10
  def self.fnv1_32(data)
15
11
  c_fnv1_32(data, data.size)
@@ -1,3 +1,3 @@
1
1
  module Bloombroom
2
- VERSION = "1.0.0"
2
+ VERSION = "1.2.0"
3
3
  end
@@ -0,0 +1,142 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/bits/bit_bucket_field'
3
+
4
+ describe Bloombroom::BitBucketField do
5
+
6
+ it "should be all unset at initialization" do
7
+ bf = Bloombroom::BitBucketField.new(8, 100)
8
+ 100.times.each do |i|
9
+ bf[i].should == 0
10
+ end
11
+ bf.total_set.should == 0
12
+ end
13
+
14
+ it "should set value" do
15
+ bf = Bloombroom::BitBucketField.new(4, 16)
16
+
17
+ 16.times.each do |i|
18
+ bf[0] = i
19
+ bf[2] = i
20
+ bf[4] = i
21
+ bf[6] = i
22
+ bf[8] = i
23
+ bf[0].should == i
24
+ bf[1].should == 0
25
+ bf[2].should == i
26
+ bf[3].should == 0
27
+ bf[4].should == i
28
+ bf[5].should == 0
29
+ bf[6].should == i
30
+ bf[7].should == 0
31
+ bf[8].should == i
32
+ end
33
+
34
+ 16.times.each do |i|
35
+ bf[0] = 0
36
+ bf[1] = i
37
+ bf[2] = 0
38
+ bf[3] = i
39
+ bf[4] = 0
40
+ bf[5] = i
41
+ bf[6] = 0
42
+ bf[7] = i
43
+ bf[8] = 0
44
+ bf[0].should == 0
45
+ bf[1].should == i
46
+ bf[2].should == 0
47
+ bf[3].should == i
48
+ bf[4].should == 0
49
+ bf[5].should == i
50
+ bf[6].should == 0
51
+ bf[7].should == i
52
+ bf[8].should == 0
53
+ end
54
+
55
+ 16.times do |value|
56
+ 16.times.each do |i|
57
+ bf[i] = value
58
+ end
59
+ 16.times.each do |i|
60
+ bf[i].should == value
61
+ end
62
+ end
63
+ end
64
+
65
+ it "should randomly set" do
66
+ bf = Bloombroom::BitBucketField.new(4, 1000)
67
+ random_buckets = Array.new(500) {rand(1000)}.uniq
68
+ random_values = Array.new(random_buckets.size) {rand(16)} # values between 0 and 15
69
+ bucket_value = random_buckets.zip(random_values)
70
+ other_buckets = Array.new(1000) {|i| i} - random_buckets
71
+
72
+ bucket_value.each{|b, v| bf[b] = v}
73
+ other_buckets.each{|i| bf[i].should == 0}
74
+ bucket_value.each{|b, v| bf[b].should == v}
75
+
76
+ other_buckets.each{|i| bf[i] = 0}
77
+ other_buckets.each{|i| bf[i].should == 0}
78
+ bucket_value.each{|b, v| bf[b].should == v}
79
+
80
+ random_buckets.each{|i| bf[i] = 0}
81
+ other_buckets.each{|i| bf[i].should == 0}
82
+ random_buckets.each{|i| bf[i].should == 0}
83
+ end
84
+
85
+ it "should report size" do
86
+ bf = Bloombroom::BitBucketField.new(4, 56)
87
+ bf.size.should == 56
88
+ end
89
+
90
+ it "should report total_set" do
91
+ bf = Bloombroom::BitBucketField.new(16, 100)
92
+ bf[0] = (2 ** 16) - 1
93
+ bf[1] = (2 ** 16) - 1
94
+ bf[2] = (2 ** 16) - 1
95
+ bf[4] = (2 ** 16) - 1
96
+ bf[50] = 1
97
+ bf.total_set.should == 5
98
+
99
+ bf = Bloombroom::BitBucketField.new(4, 1000)
100
+ random_buckets = Array.new(500) {rand(1000)}.uniq
101
+ random_values = Array.new(random_buckets.size) {rand(14) + 1} # values between 1 and 15
102
+ bucket_value = random_buckets.zip(random_values)
103
+ other_buckets = Array.new(1000) {|i| i} - random_buckets
104
+
105
+ bucket_value.each{|b, v| bf[b] = v}
106
+ bf.total_set.should == random_buckets.size
107
+
108
+ other_buckets.each{|i| bf[i] = 0}
109
+ other_buckets.each{|i| bf[i].should == 0}
110
+ bf.total_set.should == bucket_value.size
111
+
112
+ random_buckets.each{|i| bf[i] = 0}
113
+ bf.total_set.should == 0
114
+ end
115
+
116
+ it "should produce bit string using to_s" do
117
+ bf = Bloombroom::BitBucketField.new(4, 1)
118
+ bf[0] = 1
119
+ bf.to_s.should == "0001"
120
+ bf.to_s(10).should == "1"
121
+ bf[0] = 15
122
+ bf.to_s.should == "1111"
123
+ bf.to_s(10).should == "15"
124
+
125
+ bf = Bloombroom::BitBucketField.new(4, 2)
126
+ bf[0] = 3
127
+ bf[1] = 8
128
+ bf.to_s.should == "0011 1000"
129
+ bf.to_s(10).should == "3 8"
130
+
131
+ bf = Bloombroom::BitBucketField.new(4, 8)
132
+ bf[0] = 1
133
+ bf[2] = 2
134
+ bf[4] = 3
135
+ bf[6] = 4
136
+ bf.to_s.should == "0001 0000 0010 0000 0011 0000 0100 0000"
137
+ bf.to_s(10).should == "1 0 2 0 3 0 4 0"
138
+
139
+ lambda{bf.to_s(16)}.should raise_error
140
+ end
141
+
142
+ end
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/bits/bit_field'
3
+
4
+ describe Bloombroom::BitField do
5
+
6
+ it "should be all unset at initialization" do
7
+ bf = Bloombroom::BitField.new(100)
8
+ (0..99).each do |i|
9
+ bf[i].should == 0
10
+ bf.include?(i).should be_false
11
+ end
12
+ end
13
+
14
+ it "should set and unset" do
15
+ bf = Bloombroom::BitField.new(1000)
16
+ bf[100].should == 0
17
+ bf.include?(100).should be_false
18
+ bf[101].should == 0
19
+ bf.include?(101).should be_false
20
+
21
+ bf[100] = 1
22
+ bf[100].should == 1
23
+ bf.include?(100).should be_true
24
+ bf[100] = 0
25
+ bf[100].should == 0
26
+ bf.include?(100).should be_false
27
+
28
+ bf.set(101)
29
+ bf[101].should == 1
30
+ bf.include?(101).should be_true
31
+ bf.unset(101)
32
+ bf[101].should == 0
33
+ bf.include?(101).should be_false
34
+ end
35
+
36
+ it "should unset" do
37
+ bf = Bloombroom::BitField.new(100)
38
+ (0..99).each{|i| bf.include?(i).should be_false}
39
+ (0..31).each{|i| bf.unset(i)}
40
+ (0..99).each{|i| bf.include?(i).should be_false}
41
+
42
+ (0..31).each{|i| bf.set(i)}
43
+ (0..31).each{|i| bf.include?(i).should be_true}
44
+ (32..99).each{|i| bf.include?(i).should be_false}
45
+
46
+ unsetbits = [0, 5, 6, 10, 16, 23, 31]
47
+ unsetbits.each{|i| bf.unset(i)}
48
+ ((0..31).map{|i| i} - unsetbits).each{|i| bf.include?(i).should be_true}
49
+ unsetbits.each{|i| bf.include?(i).should be_false}
50
+ (32..99).each{|i| bf.include?(i).should be_false}
51
+ end
52
+
53
+ it "should randomly set and unset" do
54
+ bf = Bloombroom::BitField.new(1000)
55
+ random_bits = (0..250).map{|i| rand(1000)}
56
+ other_bits = (0..999).map{|i| i} - random_bits
57
+
58
+ random_bits.each{|i| bf.set(i)}
59
+ other_bits.each{|i| bf.include?(i).should be_false}
60
+ random_bits.each{|i| bf.include?(i).should be_true}
61
+
62
+ other_bits.each{|i| bf.unset(i)}
63
+ other_bits.each{|i| bf.include?(i).should be_false}
64
+ random_bits.each{|i| bf.include?(i).should be_true}
65
+
66
+ random_bits.each{|i| bf.unset(i)}
67
+ other_bits.each{|i| bf.include?(i).should be_false}
68
+ random_bits.each{|i| bf.include?(i).should be_false}
69
+ end
70
+
71
+ it "should randomly set and unset and support zero?" do
72
+ bf = Bloombroom::BitField.new(1000)
73
+ random_bits = (0..250).map{|i| rand(1000)}
74
+ other_bits = (0..999).map{|i| i} - random_bits
75
+
76
+ random_bits.each{|i| bf.set(i)}
77
+ other_bits.each{|i| bf.zero?(i).should be_true}
78
+ random_bits.each{|i| bf.zero?(i).should be_false}
79
+
80
+ other_bits.each{|i| bf.unset(i)}
81
+ other_bits.each{|i| bf.zero?(i).should be_true}
82
+ random_bits.each{|i| bf.zero?(i).should be_false}
83
+
84
+ random_bits.each{|i| bf.unset(i)}
85
+ other_bits.each{|i| bf.zero?(i).should be_true}
86
+ random_bits.each{|i| bf.zero?(i).should be_true}
87
+ end
88
+
89
+ it "should report size" do
90
+ bf = Bloombroom::BitField.new(456)
91
+ bf.size.should == 456
92
+ end
93
+
94
+ it "should produce bit string using to_s" do
95
+ bf = Bloombroom::BitField.new(10)
96
+ bf[1] = 1
97
+ bf[5] = 1
98
+ bf.to_s.should == "0100010000"
99
+ end
100
+
101
+ it "should report total_set" do
102
+ bf = Bloombroom::BitField.new(10)
103
+ bf[1] = 1
104
+ bf[5] = 1
105
+ bf.total_set.should == 2
106
+ end
107
+
108
+ end
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom'
3
+
4
+ describe Bloombroom::BloomFilter do
5
+
6
+ it "should add" do
7
+ bf = Bloombroom::BloomFilter.new(1000, 5)
8
+ bf.include?("abc1").should be_false
9
+ bf.include?("abc2").should be_false
10
+ bf.include?("abc3").should be_false
11
+
12
+ bf.add("abc1")
13
+ bf.include?("abc1").should be_true
14
+ bf.include?("abc2").should be_false
15
+ bf.include?("abc3").should be_false
16
+
17
+ bf.add("abc2")
18
+ bf.include?("abc1").should be_true
19
+ bf.include?("abc2").should be_true
20
+ bf.include?("abc3").should be_false
21
+
22
+ bf.add("abc3")
23
+ bf.include?("abc1").should be_true
24
+ bf.include?("abc2").should be_true
25
+ bf.include?("abc3").should be_true
26
+ end
27
+
28
+ it "should keep track of size" do
29
+ bf = Bloombroom::BloomFilter.new(1000, 5)
30
+ bf.size.should == 0
31
+ bf.add("abc1")
32
+ bf.size.should == 1
33
+ bf.add("abc2")
34
+ bf.size.should == 2
35
+ end
36
+
37
+ it "should find m and k" do
38
+ bf = Bloombroom::BloomFilter.new(*Bloombroom::BloomHelper.find_m_k(10000, 0.001))
39
+ bf.m.should == 143776
40
+ bf.k.should == 10
41
+ end
42
+
43
+ end
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/filter/bloom_helper'
3
+
4
+ describe Bloombroom::BloomHelper do
5
+
6
+ it "should multi_hash" do
7
+ h = Bloombroom::BloomHelper.multi_hash("feedfacedeadbeef", 5)
8
+ h.size.should == 5
9
+ # test vector for fnv1a_64 for "feedfacedeadbeef" -> 0xcac54572bb1a6fc8
10
+ h.should == Array.new(5) {|i| ((0xcac54572bb1a6fc8 & 0xFFFFFFFF00000000) >> 32) + (0xcac54572bb1a6fc8 & 0xFFFFFFFF) * (i + 1)}
11
+ end
12
+
13
+ it "should find m and k" do
14
+ Bloombroom::BloomHelper.find_m_k(10000, 0.01).should == [95851, 7]
15
+ Bloombroom::BloomHelper.find_m_k(10000, 0.001).should == [143776, 10]
16
+ end
17
+
18
+ end
@@ -0,0 +1,107 @@
1
+ require 'spec_helper'
2
+ # require 'bloombroom/filter/continuous_bloom_filter'
3
+ # require 'bloombroom/filter/bloom_helper'
4
+
5
+ describe Bloombroom::ContinuousBloomFilter do
6
+
7
+ it "should add" do
8
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(10, 0.001), 0)
9
+ bf.include?("abc1").should be_false
10
+ bf.include?("abc2").should be_false
11
+ bf.include?("abc3").should be_false
12
+
13
+ bf.add("abc1")
14
+ bf.include?("abc1").should be_true
15
+ bf.include?("abc2").should be_false
16
+ bf.include?("abc3").should be_false
17
+
18
+ bf.add("abc2")
19
+ bf.include?("abc1").should be_true
20
+ bf.include?("abc2").should be_true
21
+ bf.include?("abc3").should be_false
22
+
23
+ bf.add("abc3")
24
+ bf.include?("abc1").should be_true
25
+ bf.include?("abc2").should be_true
26
+ bf.include?("abc3").should be_true
27
+ end
28
+
29
+ it "should find m and k" do
30
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(10000, 0.001), 0)
31
+ bf.m.should == 143776
32
+ bf.k.should == 10
33
+ end
34
+
35
+ it "should expire" do
36
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(100, 0.001), 0)
37
+ bf.add("abc1")
38
+ bf.include?("abc1").should be_true
39
+
40
+ bf.inc_time_slot
41
+ bf.add("abc2")
42
+ bf.include?("abc1").should be_true
43
+ bf.include?("abc2").should be_true
44
+
45
+ bf.inc_time_slot
46
+ bf.add("abc3")
47
+ bf.include?("abc1").should be_true
48
+ bf.include?("abc2").should be_true
49
+ bf.include?("abc3").should be_true
50
+
51
+ bf.inc_time_slot
52
+ bf.add("abc4")
53
+ bf.include?("abc1").should be_false
54
+ bf.include?("abc2").should be_true
55
+ bf.include?("abc3").should be_true
56
+ bf.include?("abc4").should be_true
57
+
58
+ bf.inc_time_slot
59
+ bf.include?("abc1").should be_false
60
+ bf.include?("abc2").should be_false
61
+ bf.include?("abc3").should be_true
62
+ bf.include?("abc4").should be_true
63
+
64
+ bf.inc_time_slot
65
+ bf.include?("abc1").should be_false
66
+ bf.include?("abc2").should be_false
67
+ bf.include?("abc3").should be_false
68
+ bf.include?("abc4").should be_true
69
+
70
+ bf.inc_time_slot
71
+ bf.include?("abc1").should be_false
72
+ bf.include?("abc2").should be_false
73
+ bf.include?("abc3").should be_false
74
+ bf.include?("abc4").should be_false
75
+
76
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(100, 0.1), 0)
77
+ keys = []
78
+ 1.upto(100) do |i|
79
+ keys << "#{i}test#{i}"
80
+ bf.add(keys.last)
81
+ alive = keys[[keys.size - 3, 0].max, 3]
82
+ expired = keys - alive
83
+
84
+ alive.each{|key| bf.include?(key).should be_true}
85
+ expired.each{|key| bf.include?(key).should be_false}
86
+
87
+ bf.inc_time_slot
88
+ end
89
+ end
90
+
91
+ it "should compute elapse" do
92
+ bf = Bloombroom::ContinuousBloomFilter.new(*Bloombroom::BloomHelper.find_m_k(100, 0.1), 0)
93
+ bf.send(:elapsed, 1, 1).should == 0
94
+ bf.send(:elapsed, 1, 2).should == 1
95
+ bf.send(:elapsed, 1, 3).should == 2
96
+
97
+ bf.send(:elapsed, 2, 14).should == 12
98
+ bf.send(:elapsed, 2, 15).should == 13
99
+ bf.send(:elapsed, 2, 1).should == 14
100
+ bf.send(:elapsed, 3, 1).should == 13
101
+ bf.send(:elapsed, 15, 1).should == 1
102
+ bf.send(:elapsed, 15, 2).should == 2
103
+ bf.send(:elapsed, 15, 14).should == 14
104
+ end
105
+
106
+
107
+ end