bloom_filter 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # bloom_filter
2
+
3
+ A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.
4
+
5
+ If you don't know what a bloom filter is, you should read up on it: [http://en.wikipedia.org/wiki/Bloom_filter](http://en.wikipedia.org/wiki/Bloom_filter)
6
+
7
+
8
+ ## Usage
9
+ You can use it as an in-process data structure:
10
+
11
+ bloom_filter = BloomFilter.new(100, 3) # 100 = bits, 3 = hash functions
12
+ bloom_filter.add("hello")
13
+ bloom_filter.include?("hello") #=> true
14
+
15
+ or you can use it as a service:
16
+
17
+ bloom_filter = BloomFilter::Client.new("localhost", 4111)
18
+ bloom_filter.add("hello") # weeeee bits flying over network IO
19
+ bloom_filter.include?("hello") #=> true
20
+
21
+ To run it as a service, run:
22
+
23
+ bloom_filter_server -i localhost:4111 -n 1000000 -p 0.05
24
+ # -i is interface
25
+ # -n is estimated number of elements
26
+ # -p is desired false positive probability
27
+
28
+ You can also make your in-process bloom filter by specifying your estimated number of elements and false positive probability
29
+
30
+ BloomFilter.new(*BloomFilter.optimal_values(1000000, 0.05))
31
+
32
+ ### Saving your bloom filter
33
+ You can dump/load your bloom filter:
34
+
35
+ In process:
36
+
37
+ dumped = bloom_filter.dump
38
+ new_bloom_filter = BloomFilter.load(dumped)
39
+
40
+ Remote service:
41
+
42
+ bloom_filter.dump(path_to_file)
43
+ bloom_filter.load(path_to_file)
44
+
45
+
46
+ ### TODO
47
+ * Better documentation
48
+ * CLI errors
49
+ * CLI help command
50
+ * Improve load/dump workflow
51
+
52
+
53
+
54
+ bloom filters are awesome btw.
data/Rakefile CHANGED
@@ -5,12 +5,13 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "bloom_filter"
8
- gem.summary = %Q{A simple Ruby BloomFilter implementation, usable in-process or in a client-server model.}
8
+ gem.summary = %Q{A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.}
9
9
  gem.description = %Q{}
10
10
  gem.email = "misterfunnyarsal@gmail.com"
11
11
  gem.homepage = "http://github.com/arya/bloom_filter"
12
12
  gem.authors = ["Arya Asemanfar"]
13
13
  gem.add_development_dependency "thoughtbot-shoulda"
14
+ gem.add_development_dependency "mocha"
14
15
  gem.add_dependency "eventmachine", ">=0.12.8"
15
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
17
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.5.0
1
+ 0.6.0
@@ -0,0 +1,69 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{bloom_filter}
8
+ s.version = "0.6.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Arya Asemanfar"]
12
+ s.date = %q{2010-01-23}
13
+ s.default_executable = %q{bloom_filter_server}
14
+ s.description = %q{}
15
+ s.email = %q{misterfunnyarsal@gmail.com}
16
+ s.executables = ["bloom_filter_server"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE",
19
+ "README.md"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ ".gitignore",
24
+ "LICENSE",
25
+ "README.md",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/bloom_filter_server",
29
+ "bloom_filter.gemspec",
30
+ "lib/bloom_filter.rb",
31
+ "lib/bloom_filter/client.rb",
32
+ "lib/bloom_filter/protocol.rb",
33
+ "lib/bloom_filter/server.rb",
34
+ "test/bloom_filter_client_test.rb",
35
+ "test/bloom_filter_server_test.rb",
36
+ "test/bloom_filter_test.rb",
37
+ "test/test_helper.rb"
38
+ ]
39
+ s.homepage = %q{http://github.com/arya/bloom_filter}
40
+ s.rdoc_options = ["--charset=UTF-8"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = %q{1.3.5}
43
+ s.summary = %q{A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.}
44
+ s.test_files = [
45
+ "test/bloom_filter_client_test.rb",
46
+ "test/bloom_filter_server_test.rb",
47
+ "test/bloom_filter_test.rb",
48
+ "test/test_helper.rb"
49
+ ]
50
+
51
+ if s.respond_to? :specification_version then
52
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
53
+ s.specification_version = 3
54
+
55
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
56
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
57
+ s.add_development_dependency(%q<mocha>, [">= 0"])
58
+ s.add_runtime_dependency(%q<eventmachine>, [">= 0.12.8"])
59
+ else
60
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
61
+ s.add_dependency(%q<mocha>, [">= 0"])
62
+ s.add_dependency(%q<eventmachine>, [">= 0.12.8"])
63
+ end
64
+ else
65
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
66
+ s.add_dependency(%q<mocha>, [">= 0"])
67
+ s.add_dependency(%q<eventmachine>, [">= 0.12.8"])
68
+ end
69
+ end
@@ -75,7 +75,9 @@ class BloomFilter
75
75
  end
76
76
  when Protocol::LOAD
77
77
  begin
78
- self.class.filter = BloomFilter.load(File.read(request_body))
78
+ File.open(request_body, "r") do |file|
79
+ self.class.filter.replace(file)
80
+ end
79
81
  write_response(true)
80
82
  rescue Exception => e
81
83
  write_response(false)
data/lib/bloom_filter.rb CHANGED
@@ -10,6 +10,18 @@ class BloomFilter
10
10
  [m.round, k.round]
11
11
  end
12
12
 
13
+ def self.read(file)
14
+ m = file.gets(DUMP_SEPARATOR).to_i
15
+ k = file.gets(DUMP_SEPARATOR).to_i
16
+ bits = Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0)
17
+ index = 0
18
+ while line = file.gets(DUMP_SEPARATOR)
19
+ bits[index] = line.to_i
20
+ index += 1
21
+ end
22
+ [m, k, bits]
23
+ end
24
+
13
25
  def self.load(dumped)
14
26
  m, k, *bits = dumped.split(DUMP_SEPARATOR).collect { |v| v.to_i }
15
27
  new(m, k, bits)
@@ -25,8 +37,11 @@ class BloomFilter
25
37
  @k.times do |i|
26
38
  self.set_bit(Zlib.crc32("#{i}#{el}") % @m)
27
39
  end
40
+ self
28
41
  end
29
42
 
43
+ alias_method :<<, :add
44
+
30
45
  def include?(el)
31
46
  @k.times do |i|
32
47
  return false if !bit_set?(Zlib.crc32("#{i}#{el}") % @m)
@@ -34,10 +49,20 @@ class BloomFilter
34
49
  true
35
50
  end
36
51
 
52
+ def &(els)
53
+ els.select { |el| self.include?(el) }
54
+ end
55
+
37
56
  def dump
38
57
  [@m, @k, *@bits].join(DUMP_SEPARATOR)
39
58
  end
40
59
 
60
+ def replace(file)
61
+ m, k, bits = self.class.read(file)
62
+ @m, @k = m, k
63
+ @bits.replace(bits)
64
+ end
65
+
41
66
  protected
42
67
  def set_bit(n)
43
68
  index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM
@@ -0,0 +1,61 @@
1
+ require 'test_helper'
2
+ require 'bloom_filter/client'
3
+
4
+ class BloomFilterClientTest < Test::Unit::TestCase
5
+ context "with a client object" do
6
+ setup do
7
+ @socket = mock()
8
+ TCPSocket.expects(:new).with("localhost", 4111).returns(@socket)
9
+ IO.stubs(:select).returns(@socket)
10
+ @client = BloomFilter::Client.new("localhost", 4111, :timeout => 1)
11
+ end
12
+
13
+ should "add elements" do
14
+ @socket.expects(:write).with("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello")
15
+ @socket.expects(:read).with(4).returns([1].pack("N"))
16
+ @socket.expects(:read).with(1).returns("1")
17
+ assert @client.add("hello")
18
+ end
19
+
20
+ should "check for elements" do
21
+ @socket.expects(:write).with("#{[6].pack("N")}#{BloomFilter::Protocol::INCLUDE}hello")
22
+ @socket.expects(:read).with(4).returns([1].pack("N"))
23
+ @socket.expects(:read).with(1).returns("1")
24
+ assert @client.include?("hello")
25
+ end
26
+
27
+ should "return false for elements that arent there" do
28
+ @socket.expects(:write).with("#{[4].pack("N")}#{BloomFilter::Protocol::INCLUDE}bye")
29
+ @socket.expects(:read).with(4).returns([1].pack("N"))
30
+ @socket.expects(:read).with(1).returns("0")
31
+ assert !@client.include?("bye")
32
+ end
33
+
34
+ should "check for multiple elements" do
35
+ @socket.expects(:write).with("#{[10].pack("N")}#{BloomFilter::Protocol::INCLUDE_MANY}hello,bye")
36
+ @socket.expects(:read).with(4).returns([2].pack("N"))
37
+ @socket.expects(:read).with(2).returns("10")
38
+ assert_equal ["hello"], @client & ["hello", "bye"]
39
+ end
40
+
41
+ should "send dump" do
42
+ @socket.expects(:write).with("#{[7].pack("N")}#{BloomFilter::Protocol::DUMP}/tmp/f")
43
+ @socket.expects(:read).with(4).returns([1].pack("N"))
44
+ @socket.expects(:read).with(1).returns("1")
45
+ assert @client.dump("/tmp/f")
46
+ end
47
+
48
+ should "send load" do
49
+ @socket.expects(:write).with("#{[7].pack("N")}#{BloomFilter::Protocol::LOAD}/tmp/f")
50
+ @socket.expects(:read).with(4).returns([1].pack("N"))
51
+ @socket.expects(:read).with(1).returns("1")
52
+ assert @client.load("/tmp/f")
53
+ end
54
+
55
+ should "timeout" do
56
+ @socket.expects(:write).with("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello")
57
+ IO.expects(:select).with([@socket], nil, nil, 1)
58
+ @client.add("hello")
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,81 @@
1
+ require 'test_helper'
2
+ require 'bloom_filter/server'
3
+
4
+ class BloomFilterServerTest < Test::Unit::TestCase
5
+ def expects_send_data(str)
6
+ @em.expects(:send_data).with('sig', str, str.size)
7
+ end
8
+
9
+ context "with a server" do
10
+ setup do
11
+ @server = BloomFilter.new_server(100, 0.1).new("sig")
12
+ @em = EventMachine
13
+ @filter = @server.class.filter
14
+ end
15
+
16
+ should "add elements" do
17
+ @filter.expects(:add).with("hello")
18
+ expects_send_data("#{[1].pack('N')}1")
19
+
20
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello")
21
+ end
22
+
23
+ should "check for elements" do
24
+ @filter.expects(:include?).with("hello").returns(true)
25
+ expects_send_data("#{[1].pack("N")}1")
26
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::INCLUDE}hello")
27
+ end
28
+
29
+ should "check for multiple elements" do
30
+ @filter.expects(:include?).with("hello").returns(true)
31
+ @filter.expects(:include?).with("bye").returns(false)
32
+ expects_send_data("#{[2].pack("N")}10")
33
+ @server.receive_data("#{[10].pack("N")}#{BloomFilter::Protocol::INCLUDE_MANY}hello,bye")
34
+ end
35
+
36
+
37
+ should "send dump" do
38
+ path = "/tmp/f"
39
+ @filter.expects(:dump).returns("dumped_data")
40
+
41
+ file = mock()
42
+ File.expects(:open).with(path, 'w').yields(file)
43
+ file.expects(:write).with("dumped_data")
44
+
45
+ expects_send_data("#{[1].pack("N")}1")
46
+
47
+
48
+ @server.receive_data("#{[path.size + 1].pack("N")}#{BloomFilter::Protocol::DUMP}#{path}")
49
+ end
50
+
51
+ should "send load" do
52
+ path = "/tmp/f"
53
+ file = mock()
54
+ @filter.expects(:replace).with(file)
55
+ File.expects(:open).with(path, 'r').yields(file)
56
+
57
+ expects_send_data("#{[1].pack("N")}1")
58
+
59
+ @server.receive_data("#{[path.size + 1].pack("N")}#{BloomFilter::Protocol::LOAD}#{path}")
60
+ end
61
+
62
+
63
+ should "properly buffer partial messages" do
64
+ @filter.expects(:add).with("hello")
65
+ expects_send_data("#{[1].pack('N')}1")
66
+
67
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hel")
68
+ @server.receive_data("lo")
69
+ end
70
+
71
+
72
+ should "handle two messages sent at once" do
73
+ @filter.expects(:add).with("hello")
74
+ expects_send_data("#{[1].pack('N')}1").twice()
75
+ @filter.expects(:add).with("bye")
76
+
77
+ @server.receive_data("#{[6].pack("N")}#{BloomFilter::Protocol::ADD}hello#{[4].pack("N")}#{BloomFilter::Protocol::ADD}bye")
78
+ end
79
+
80
+ end
81
+ end
@@ -1,7 +1,54 @@
1
1
  require 'test_helper'
2
2
 
3
3
  class BloomFilterTest < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
4
+ context "with a bloom filter" do
5
+ setup do
6
+ @bloom_filter = BloomFilter.new(1000, 3)
7
+ end
8
+
9
+ should "not claim anything is in the set when its empty" do
10
+ assert !@bloom_filter.include?("hello world")
11
+ end
12
+
13
+ should "claim an item we just added is in the set" do
14
+ @bloom_filter.add("hello world")
15
+ assert @bloom_filter.include?("hello world")
16
+ end
17
+
18
+ should "dump into a string" do
19
+ assert @bloom_filter.dump.is_a?(String)
20
+ end
21
+
22
+ should "a loaded set should claim the element we just added is in the set" do
23
+ @bloom_filter.add("hello world")
24
+ loaded = BloomFilter.load(@bloom_filter.dump)
25
+ assert loaded.include?("hello world")
26
+ end
27
+
28
+ should "replace itself with another filter" do
29
+ @bloom_filter.add("hello world")
30
+ replacement = BloomFilter.new(1000, 3)
31
+ io = StringIO.new(replacement.dump)
32
+ @bloom_filter.replace(io)
33
+ assert !@bloom_filter.include?("hello world")
34
+ end
35
+
36
+ should "allow chaining of adding" do
37
+ @bloom_filter << "hello" << "world"
38
+ assert @bloom_filter.include?("hello")
39
+ assert @bloom_filter.include?("world")
40
+ end
41
+
42
+ should "intersect with an array" do
43
+ @bloom_filter << "hello" << "world"
44
+ assert_equal ["hello", "world"], @bloom_filter & ["hello", "world", "bye", "moon"]
45
+ end
46
+ end
47
+
48
+ should "create a new filter from the dumped file of another" do
49
+ bloom_filter = BloomFilter.new(1000, 3)
50
+ bloom_filter.add("hello world")
51
+ bloom_filter_2 = BloomFilter.load(bloom_filter.dump)
52
+ assert bloom_filter_2.include?("hello world")
6
53
  end
7
54
  end
data/test/test_helper.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'rubygems'
2
2
  require 'test/unit'
3
3
  require 'shoulda'
4
+ require 'mocha'
4
5
 
5
6
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
7
  $LOAD_PATH.unshift(File.dirname(__FILE__))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bloom_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arya Asemanfar
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-01-22 00:00:00 -08:00
12
+ date: 2010-01-23 00:00:00 -08:00
13
13
  default_executable: bloom_filter_server
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -22,6 +22,16 @@ dependencies:
22
22
  - !ruby/object:Gem::Version
23
23
  version: "0"
24
24
  version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mocha
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
25
35
  - !ruby/object:Gem::Dependency
26
36
  name: eventmachine
27
37
  type: :runtime
@@ -40,19 +50,22 @@ extensions: []
40
50
 
41
51
  extra_rdoc_files:
42
52
  - LICENSE
43
- - README.rdoc
53
+ - README.md
44
54
  files:
45
55
  - .document
46
56
  - .gitignore
47
57
  - LICENSE
48
- - README.rdoc
58
+ - README.md
49
59
  - Rakefile
50
60
  - VERSION
51
61
  - bin/bloom_filter_server
62
+ - bloom_filter.gemspec
52
63
  - lib/bloom_filter.rb
53
64
  - lib/bloom_filter/client.rb
54
65
  - lib/bloom_filter/protocol.rb
55
66
  - lib/bloom_filter/server.rb
67
+ - test/bloom_filter_client_test.rb
68
+ - test/bloom_filter_server_test.rb
56
69
  - test/bloom_filter_test.rb
57
70
  - test/test_helper.rb
58
71
  has_rdoc: true
@@ -82,7 +95,9 @@ rubyforge_project:
82
95
  rubygems_version: 1.3.5
83
96
  signing_key:
84
97
  specification_version: 3
85
- summary: A simple Ruby BloomFilter implementation, usable in-process or in a client-server model.
98
+ summary: A simple BloomFilter implementation, usable in-process or as an EventMachine daemon.
86
99
  test_files:
100
+ - test/bloom_filter_client_test.rb
101
+ - test/bloom_filter_server_test.rb
87
102
  - test/bloom_filter_test.rb
88
103
  - test/test_helper.rb
data/README.rdoc DELETED
@@ -1,18 +0,0 @@
1
- = bloom_filter
2
-
3
- Description goes here.
4
-
5
- == Note on Patches/Pull Requests
6
-
7
- * Fork the project.
8
- * Make your feature addition or bug fix.
9
- * Add tests for it. This is important so I don't break it in a
10
- future version unintentionally.
11
- * Commit, do not mess with rakefile, version, or history.
12
- (if you want to have your own version, that is fine but
13
- bump version in a commit by itself I can ignore when I pull)
14
- * Send me a pull request. Bonus points for topic branches.
15
-
16
- == Copyright
17
-
18
- Copyright (c) 2010 Arya Asemanfar. See LICENSE for details.