bloom_filter 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # bloom_filter
2
+
3
+ A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.
4
+
5
+ If you don't know what a bloom filter is, you should read up on it: [http://en.wikipedia.org/wiki/Bloom_filter](http://en.wikipedia.org/wiki/Bloom_filter)
6
+
7
+
8
+ ## Usage
9
+ You can use it as an in-process data structure:
10
+
11
+ bloom_filter = BloomFilter.new(100, 3) # 100 = bits, 3 = hash functions
12
+ bloom_filter.add("hello")
13
+ bloom_filter.include?("hello") #=> true
14
+
15
+ or you can use it as a service:
16
+
17
+ bloom_filter = BloomFilter::Client.new("localhost", 4111)
18
+ bloom_filter.add("hello") # weeeee bits flying over network IO
19
+ bloom_filter.include?("hello") #=> true
20
+
21
+ To run it as a service, run:
22
+
23
+ bloom_filter_server -i localhost:4111 -n 1000000 -p 0.05
24
+ # -i is interface
25
+ # -n is estimated number of elements
26
+ # -p is desired false positive probability
27
+
28
+ You can also make your in-process bloom filter by specifying your estimated number of elements and false positive probability
29
+
30
+ BloomFilter.new(*BloomFilter.optimal_values(1000000, 0.05))
31
+
32
+ ### Saving your bloom filter
33
+ You can dump/load your bloom filter:
34
+
35
+ In process:
36
+
37
+ dumped = bloom_filter.dump
38
+ new_bloom_filter = BloomFilter.load(dumped)
39
+
40
+ Remote service:
41
+
42
+ bloom_filter.dump(path_to_file)
43
+ bloom_filter.load(path_to_file)
44
+
45
+
46
+ ### TODO
47
+ * Better documentation
48
+ * CLI errors
49
+ * CLI help command
50
+ * Improve load/dump workflow
51
+
52
+
53
+
54
+ bloom filters are awesome btw.
data/Rakefile CHANGED
@@ -5,12 +5,13 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "bloom_filter"
8
- gem.summary = %Q{A simple Ruby BloomFilter implementation, usable in-process or in a client-server model.}
8
+ gem.summary = %Q{A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.}
9
9
  gem.description = %Q{}
10
10
  gem.email = "misterfunnyarsal@gmail.com"
11
11
  gem.homepage = "http://github.com/arya/bloom_filter"
12
12
  gem.authors = ["Arya Asemanfar"]
13
13
  gem.add_development_dependency "thoughtbot-shoulda"
14
+ gem.add_development_dependency "mocha"
14
15
  gem.add_dependency "eventmachine", ">=0.12.8"
15
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
17
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.6.0
@@ -0,0 +1,69 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{bloom_filter}
8
+ s.version = "0.6.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Arya Asemanfar"]
12
+ s.date = %q{2010-01-23}
13
+ s.default_executable = %q{bloom_filter_server}
14
+ s.description = %q{}
15
+ s.email = %q{misterfunnyarsal@gmail.com}
16
+ s.executables = ["bloom_filter_server"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.md"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.md",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/bloom_filter_server",
29
+ "bloom_filter.gemspec",
30
+ "lib/bloom_filter.rb",
31
+ "lib/bloom_filter/client.rb",
32
+ "lib/bloom_filter/protocol.rb",
33
+ "lib/bloom_filter/server.rb",
34
+ "test/bloom_filter_client_test.rb",
35
+ "test/bloom_filter_server_test.rb",
36
+ "test/bloom_filter_test.rb",
37
+ "test/test_helper.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/arya/bloom_filter}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.}
44
+ s.test_files = [
45
+ "test/bloom_filter_client_test.rb",
46
+ "test/bloom_filter_server_test.rb",
47
+ "test/bloom_filter_test.rb",
48
+ "test/test_helper.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
57
+ s.add_development_dependency(%q<mocha>, [">= 0"])
58
+ s.add_runtime_dependency(%q<eventmachine>, [">= 0.12.8"])
59
+ else
60
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
61
+ s.add_dependency(%q<mocha>, [">= 0"])
62
+ s.add_dependency(%q<eventmachine>, [">= 0.12.8"])
63
+ end
64
+ else
65
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
66
+ s.add_dependency(%q<mocha>, [">= 0"])
67
+ s.add_dependency(%q<eventmachine>, [">= 0.12.8"])
68
+ end
69
+ end
@@ -75,7 +75,9 @@ class BloomFilter
75
75
  end
76
76
  when Protocol::LOAD
77
77
  begin
78
- self.class.filter = BloomFilter.load(File.read(request_body))
78
+ File.open(request_body, "r") do |file|
79
+ self.class.filter.replace(file)
80
+ end
79
81
  write_response(true)
80
82
  rescue Exception => e
81
83
  write_response(false)
data/lib/bloom_filter.rb CHANGED
@@ -10,6 +10,18 @@ class BloomFilter
10
10
  [m.round, k.round]
11
11
  end
12
12
 
13
+ def self.read(file)
14
+ m = file.gets(DUMP_SEPARATOR).to_i
15
+ k = file.gets(DUMP_SEPARATOR).to_i
16
+ bits = Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0)
17
+ index = 0
18
+ while line = file.gets(DUMP_SEPARATOR)
19
+ bits[index] = line.to_i
20
+ index += 1
21
+ end
22
+ [m, k, bits]
23
+ end
24
+
13
25
  def self.load(dumped)
14
26
  m, k, *bits = dumped.split(DUMP_SEPARATOR).collect { |v| v.to_i }
15
27
  new(m, k, bits)
@@ -25,8 +37,11 @@ class BloomFilter
25
37
  @k.times do |i|
26
38
  self.set_bit(Zlib.crc32("#{i}#{el}") % @m)
27
39
  end
40
+ self
28
41
  end
29
42
 
43
+ alias_method :<<, :add
44
+
30
45
  def include?(el)
31
46
  @k.times do |i|
32
47
  return false if !bit_set?(Zlib.crc32("#{i}#{el}") % @m)
@@ -34,10 +49,20 @@ class BloomFilter
34
49
  true
35
50
  end
36
51
 
52
+ def &(els)
53
+ els.select { |el| self.include?(el) }
54
+ end
55
+
37
56
  def dump
38
57
  [@m, @k, *@bits].join(DUMP_SEPARATOR)
39
58
  end
40
59
 
60
+ def replace(file)
61
+ m, k, bits = self.class.read(file)
62
+ @m, @k = m, k
63
+ @bits.replace(bits)
64
+ end
65
+
41
66
  protected
42
67
  def set_bit(n)
43
68
  index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM
@@ -0,0 +1,61 @@
1
+ require 'test_helper'
2
+ require 'bloom_filter/client'
3
+
4
+ class BloomFilterClientTest < Test::Unit::TestCase
5
+ context "with a client object" do
6
+ setup do
7
+ @socket = mock()
8
+ TCPSocket.expects(:new).with("localhost", 4111).returns(@socket)
9
+ IO.stubs(:select).returns(@socket)
10
+ @client = BloomFilter::Client.new("localhost", 4111, :timeout => 1)
11
+ end
12
+
13
+ should "add elements" do
14
+ @socket.expects(:write).with("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello")
15
+ @socket.expects(:read).with(4).returns([1].pack("N"))
16
+ @socket.expects(:read).with(1).returns("1")
17
+ assert @client.add("hello")
18
+ end
19
+
20
+ should "check for elements" do
21
+ @socket.expects(:write).with("#{[6].pack("N")}#{BloomFilter::Protocol::INCLUDE}hello")
22
+ @socket.expects(:read).with(4).returns([1].pack("N"))
23
+ @socket.expects(:read).with(1).returns("1")
24
+ assert @client.include?("hello")
25
+ end
26
+
27
+ should "return false for elements that arent there" do
28
+ @socket.expects(:write).with("#{[4].pack("N")}#{BloomFilter::Protocol::INCLUDE}bye")
29
+ @socket.expects(:read).with(4).returns([1].pack("N"))
30
+ @socket.expects(:read).with(1).returns("0")
31
+ assert !@client.include?("bye")
32
+ end
33
+
34
+ should "check for multiple elements" do
35
+ @socket.expects(:write).with("#{[10].pack("N")}#{BloomFilter::Protocol::INCLUDE_MANY}hello,bye")
36
+ @socket.expects(:read).with(4).returns([2].pack("N"))
37
+ @socket.expects(:read).with(2).returns("10")
38
+ assert_equal ["hello"], @client & ["hello", "bye"]
39
+ end
40
+
41
+ should "send dump" do
42
+ @socket.expects(:write).with("#{[7].pack("N")}#{BloomFilter::Protocol::DUMP}/tmp/f")
43
+ @socket.expects(:read).with(4).returns([1].pack("N"))
44
+ @socket.expects(:read).with(1).returns("1")
45
+ assert @client.dump("/tmp/f")
46
+ end
47
+
48
+ should "send load" do
49
+ @socket.expects(:write).with("#{[7].pack("N")}#{BloomFilter::Protocol::LOAD}/tmp/f")
50
+ @socket.expects(:read).with(4).returns([1].pack("N"))
51
+ @socket.expects(:read).with(1).returns("1")
52
+ assert @client.load("/tmp/f")
53
+ end
54
+
55
+ should "timeout" do
56
+ @socket.expects(:write).with("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello")
57
+ IO.expects(:select).with([@socket], nil, nil, 1)
58
+ @client.add("hello")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,81 @@
1
+ require 'test_helper'
2
+ require 'bloom_filter/server'
3
+
4
+ class BloomFilterServerTest < Test::Unit::TestCase
5
+ def expects_send_data(str)
6
+ @em.expects(:send_data).with('sig', str, str.size)
7
+ end
8
+
9
+ context "with a server" do
10
+ setup do
11
+ @server = BloomFilter.new_server(100, 0.1).new("sig")
12
+ @em = EventMachine
13
+ @filter = @server.class.filter
14
+ end
15
+
16
+ should "add elements" do
17
+ @filter.expects(:add).with("hello")
18
+ expects_send_data("#{[1].pack('N')}1")
19
+
20
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello")
21
+ end
22
+
23
+ should "check for elements" do
24
+ @filter.expects(:include?).with("hello").returns(true)
25
+ expects_send_data("#{[1].pack("N")}1")
26
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::INCLUDE}hello")
27
+ end
28
+
29
+ should "check for multiple elements" do
30
+ @filter.expects(:include?).with("hello").returns(true)
31
+ @filter.expects(:include?).with("bye").returns(false)
32
+ expects_send_data("#{[2].pack("N")}10")
33
+ @server.receive_data("#{[10].pack("N")}#{BloomFilter::Protocol::INCLUDE_MANY}hello,bye")
34
+ end
35
+
36
+
37
+ should "send dump" do
38
+ path = "/tmp/f"
39
+ @filter.expects(:dump).returns("dumped_data")
40
+
41
+ file = mock()
42
+ File.expects(:open).with(path, 'w').yields(file)
43
+ file.expects(:write).with("dumped_data")
44
+
45
+ expects_send_data("#{[1].pack("N")}1")
46
+
47
+
48
+ @server.receive_data("#{[path.size + 1].pack("N")}#{BloomFilter::Protocol::DUMP}#{path}")
49
+ end
50
+
51
+ should "send load" do
52
+ path = "/tmp/f"
53
+ file = mock()
54
+ @filter.expects(:replace).with(file)
55
+ File.expects(:open).with(path, 'r').yields(file)
56
+
57
+ expects_send_data("#{[1].pack("N")}1")
58
+
59
+ @server.receive_data("#{[path.size + 1].pack("N")}#{BloomFilter::Protocol::LOAD}#{path}")
60
+ end
61
+
62
+
63
+ should "properly buffer partial messages" do
64
+ @filter.expects(:add).with("hello")
65
+ expects_send_data("#{[1].pack('N')}1")
66
+
67
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hel")
68
+ @server.receive_data("lo")
69
+ end
70
+
71
+
72
+ should "handle two messages sent at once" do
73
+ @filter.expects(:add).with("hello")
74
+ expects_send_data("#{[1].pack('N')}1").twice()
75
+ @filter.expects(:add).with("bye")
76
+
77
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello#{[4].pack("N")}#{BloomFilter::Protocol::ADD}bye")
78
+ end
79
+
80
+ end
81
+ end
@@ -1,7 +1,54 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class BloomFilterTest < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
4
+ context "with a bloom filter" do
5
+ setup do
6
+ @bloom_filter = BloomFilter.new(1000, 3)
7
+ end
8
+
9
+ should "not claim anything is in the set when its empty" do
10
+ assert !@bloom_filter.include?("hello world")
11
+ end
12
+
13
+ should "claim an item we just added is in the set" do
14
+ @bloom_filter.add("hello world")
15
+ assert @bloom_filter.include?("hello world")
16
+ end
17
+
18
+ should "dump into a string" do
19
+ assert @bloom_filter.dump.is_a?(String)
20
+ end
21
+
22
+ should "a loaded set should claim the element we just added is in the set" do
23
+ @bloom_filter.add("hello world")
24
+ loaded = BloomFilter.load(@bloom_filter.dump)
25
+ assert loaded.include?("hello world")
26
+ end
27
+
28
+ should "replace itself with another filter" do
29
+ @bloom_filter.add("hello world")
30
+ replacement = BloomFilter.new(1000, 3)
31
+ io = StringIO.new(replacement.dump)
32
+ @bloom_filter.replace(io)
33
+ assert !@bloom_filter.include?("hello world")
34
+ end
35
+
36
+ should "allow chaining of adding" do
37
+ @bloom_filter << "hello" << "world"
38
+ assert @bloom_filter.include?("hello")
39
+ assert @bloom_filter.include?("world")
40
+ end
41
+
42
+ should "intersect with an array" do
43
+ @bloom_filter << "hello" << "world"
44
+ assert_equal ["hello", "world"], @bloom_filter & ["hello", "world", "bye", "moon"]
45
+ end
46
+ end
47
+
48
+ should "create a new filter from the dumped file of another" do
49
+ bloom_filter = BloomFilter.new(1000, 3)
50
+ bloom_filter.add("hello world")
51
+ bloom_filter_2 = BloomFilter.load(bloom_filter.dump)
52
+ assert bloom_filter_2.include?("hello world")
6
53
  end
7
54
  end
data/test/test_helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'test/unit'
3
3
  require 'shoulda'
4
+ require 'mocha'
4
5
 
5
6
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bloom_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arya Asemanfar
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-22 00:00:00 -08:00
12
+ date: 2010-01-23 00:00:00 -08:00
13
13
  default_executable: bloom_filter_server
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,6 +22,16 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mocha
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
25
35
  - !ruby/object:Gem::Dependency
26
36
  name: eventmachine
27
37
  type: :runtime
@@ -40,19 +50,22 @@ extensions: []
40
50
 
41
51
  extra_rdoc_files:
42
52
  - LICENSE
43
- - README.rdoc
53
+ - README.md
44
54
  files:
45
55
  - .document
46
56
  - .gitignore
47
57
  - LICENSE
48
- - README.rdoc
58
+ - README.md
49
59
  - Rakefile
50
60
  - VERSION
51
61
  - bin/bloom_filter_server
62
+ - bloom_filter.gemspec
52
63
  - lib/bloom_filter.rb
53
64
  - lib/bloom_filter/client.rb
54
65
  - lib/bloom_filter/protocol.rb
55
66
  - lib/bloom_filter/server.rb
67
+ - test/bloom_filter_client_test.rb
68
+ - test/bloom_filter_server_test.rb
56
69
  - test/bloom_filter_test.rb
57
70
  - test/test_helper.rb
58
71
  has_rdoc: true
@@ -82,7 +95,9 @@ rubyforge_project:
82
95
  rubygems_version: 1.3.5
83
96
  signing_key:
84
97
  specification_version: 3
85
- summary: A simple Ruby BloomFilter implementation, usable in-process or in a client-server model.
98
+ summary: A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.
86
99
  test_files:
100
+ - test/bloom_filter_client_test.rb
101
+ - test/bloom_filter_server_test.rb
87
102
  - test/bloom_filter_test.rb
88
103
  - test/test_helper.rb
data/README.rdoc DELETED
@@ -1,18 +0,0 @@
1
- = bloom_filter
2
-
3
- Description goes here.
4
-
5
- == Note on Patches/Pull Requests
6
-
7
- * Fork the project.
8
- * Make your feature addition or bug fix.
9
- * Add tests for it. This is important so I don't break it in a
10
- future version unintentionally.
11
- * Commit, do not mess with rakefile, version, or history.
12
- (if you want to have your own version, that is fine but
13
- bump version in a commit by itself I can ignore when I pull)
14
- * Send me a pull request. Bonus points for topic branches.
15
-
16
- == Copyright
17
-
18
- Copyright (c) 2010 Arya Asemanfar. See LICENSE for details.