bloom_filter 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010 Arya Asemanfar
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,18 @@
1
+ = bloom_filter
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but
13
+ bump version in a commit by itself I can ignore when I pull)
14
+ * Send me a pull request. Bonus points for topic branches.
15
+
16
+ == Copyright
17
+
18
+ Copyright (c) 2010 Arya Asemanfar. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "bloom_filter"
8
+ gem.summary = %Q{A simple Ruby BloomFilter implementation, usable in-process or in a client-server model.}
9
+ gem.description = %Q{}
10
+ gem.email = "misterfunnyarsal@gmail.com"
11
+ gem.homepage = "http://github.com/arya/bloom_filter"
12
+ gem.authors = ["Arya Asemanfar"]
13
+ gem.add_development_dependency "thoughtbot-shoulda"
14
+ gem.add_dependency "eventmachine", ">=0.12.8"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/*_test.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/*_test.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION')
48
+ version = File.read('VERSION')
49
+ else
50
+ version = ""
51
+ end
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "bloom_filter #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.5.0
@@ -0,0 +1,27 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'optparse'
4
+ require 'bloom_filter/server'
5
+
6
+ options = {:host => "localhost", :port => 4111}
7
+
8
+ OptionParser.new do |opts|
9
+ opts.banner = "Usage: bloom_filter_server -n"
10
+ opts.on('-n', "--estimated-items N", "Estimated number of elements in the set") do |n|
11
+ options[:n] = n.to_i
12
+ end
13
+
14
+ opts.on('-p', "--probability P", "Desired false positive probability") do |p|
15
+ options[:p] = p.to_f
16
+ end
17
+
18
+ opts.on("-i", "--interface HOST:PORT", "Host and port to listen on") do |host_port|
19
+ options[:host], options[:port] = host_port.split(":")
20
+ options[:port] = options[:port].to_i
21
+ end
22
+ end.parse!
23
+
24
+
25
+ EM.run do
26
+ EM.start_server(options[:host], options[:port], BloomFilter.new_server(options[:n], options[:p]))
27
+ end
@@ -0,0 +1,51 @@
1
+ require 'zlib'
2
+
3
+ class BloomFilter
4
+ BITS_PER_FIXNUM = 31 # this can really be more on 64-bit systems
5
+ DUMP_SEPARATOR = "\n"
6
+
7
+ def self.optimal_values(estimated_elements, probability)
8
+ m = -(estimated_elements * Math.log(probability)) / (Math.log(2) ** 2)
9
+ k = 0.7 * (m / estimated_elements)
10
+ [m.round, k.round]
11
+ end
12
+
13
+ def self.load(dumped)
14
+ m, k, *bits = dumped.split(DUMP_SEPARATOR).collect { |v| v.to_i }
15
+ new(m, k, bits)
16
+ end
17
+
18
+ def initialize(m, k, bits = nil)
19
+ @k = k
20
+ @m = m
21
+ @bits = bits || Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0)
22
+ end
23
+
24
+ def add(el)
25
+ @k.times do |i|
26
+ self.set_bit(Zlib.crc32("#{i}#{el}") % @m)
27
+ end
28
+ end
29
+
30
+ def include?(el)
31
+ @k.times do |i|
32
+ return false if !bit_set?(Zlib.crc32("#{i}#{el}") % @m)
33
+ end
34
+ true
35
+ end
36
+
37
+ def dump
38
+ [@m, @k, *@bits].join(DUMP_SEPARATOR)
39
+ end
40
+
41
+ protected
42
+ def set_bit(n)
43
+ index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM
44
+ @bits[index] |= 1 << offset
45
+ end
46
+
47
+ def bit_set?(n)
48
+ index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM
49
+ (@bits[index] & (1 << offset)) > 0
50
+ end
51
+ end
@@ -0,0 +1,86 @@
1
+ require 'socket'
2
+ require 'bloom_filter/protocol'
3
+
4
+ class BloomFilter
5
+ class Client
6
+ PACK_N = "N"
7
+ def initialize(host, port, options = {})
8
+ @host, @port = host, port
9
+ @timeout = options[:timeout]
10
+ reconnect
11
+ end
12
+
13
+ def add(el)
14
+ el = el.to_s
15
+ @socket.write("#{[el.size + 1].pack(PACK_N)}#{Protocol::ADD}#{el}")
16
+ timeout_or_default(false) do
17
+ @socket.read(@socket.read(4).unpack(PACK_N).first) == Protocol::TRUE
18
+ end
19
+ end
20
+ alias_method :<<, :add
21
+
22
+ def include?(el)
23
+ el = el.to_s
24
+ @socket.write("#{[el.size + 1].pack(PACK_N)}#{Protocol::INCLUDE}#{el}")
25
+ timeout_or_default(true) do
26
+ @socket.read(@socket.read(4).unpack(PACK_N).first) == Protocol::TRUE
27
+ end
28
+ end
29
+
30
+ def &(els)
31
+ if els.size == 1
32
+ el = els.first
33
+ self.include?(el) ? [el] : []
34
+ else
35
+ elements = els.collect { |el| el.to_s }.join(Protocol::DEFAULT_SEPARATOR)
36
+ @socket.write("#{[elements.size + 1].pack(PACK_N)}#{Protocol::INCLUDE_MANY}#{elements}")
37
+
38
+ timeout_or_default(els) do
39
+ response = @socket.read(@socket.read(4).unpack(PACK_N).first)
40
+ result = []
41
+ els.size.times do |i|
42
+ result << els[i] if response[i,1] == Protocol::TRUE
43
+ end
44
+ result
45
+ end
46
+ end
47
+ end
48
+
49
+ def dump(path, timeout = nil)
50
+ @socket.write("#{[path.size + 1].pack(PACK_N)}#{Protocol::DUMP}#{path}")
51
+ timeout_or_default(false, timeout) do
52
+ @socket.read(@socket.read(4).unpack(PACK_N).first) == Protocol::TRUE
53
+ end
54
+ end
55
+
56
+ def load(path, timeout = nil)
57
+ @socket.write("#{[path.size + 1].pack(PACK_N)}#{Protocol::LOAD}#{path}")
58
+ timeout_or_default(false, timeout) do
59
+ @socket.read(@socket.read(4).unpack(PACK_N).first) == Protocol::TRUE
60
+ end
61
+ end
62
+
63
+ def reconnect
64
+ @socket.close if @socket && !@socket.closed?
65
+ @socket = begin
66
+ TCPSocket.new(@host, @port)
67
+ rescue Exception => e
68
+ nil
69
+ end
70
+ end
71
+
72
+ def connected?
73
+ !!(@socket && !@socket.closed?)
74
+ end
75
+
76
+ private
77
+ def timeout_or_default(default, timeout = nil, &block)
78
+ ready = IO.select([@socket], nil, nil, timeout || @timeout)
79
+ if ready
80
+ block.call
81
+ else
82
+ default
83
+ end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,13 @@
1
+ class BloomFilter
2
+ module Protocol
3
+ FALSE = "0"
4
+ TRUE = "1"
5
+ DEFAULT_SEPARATOR = ","
6
+ ADD = 0
7
+ INCLUDE = 1
8
+ INCLUDE_MANY = 2
9
+ ADD_MANY = 3
10
+ DUMP = 4 # TODO
11
+ LOAD = 5 # TODO
12
+ end
13
+ end
@@ -0,0 +1,91 @@
1
+ require 'bloom_filter'
2
+ require 'bloom_filter/protocol'
3
+ require 'eventmachine'
4
+
5
+
6
+ class BloomFilter
7
+ def self.new_server(n, p)
8
+ klass = Class.new(Server)
9
+ klass.filter = BloomFilter.new(*BloomFilter.optimal_values(n, p))
10
+ klass
11
+ end
12
+
13
+ class Server < EM::Connection
14
+ PACK_N = "N"
15
+
16
+ def self.filter
17
+ @filter
18
+ end
19
+
20
+ def self.filter=(filter)
21
+ @filter = filter
22
+ end
23
+
24
+ def post_init
25
+ @buffer = nil
26
+ @size = nil
27
+ end
28
+
29
+ def receive_data(data)
30
+ @buffer ||= ""
31
+ @buffer << data
32
+
33
+ if !@size && @buffer.size > 4
34
+ @size, @buffer = @buffer[0,4].unpack(PACK_N).first, @buffer[4..-1]
35
+ end
36
+
37
+ if @size && @buffer.size >= @size
38
+ if @buffer.size > @size
39
+ request, remainder = @buffer[0, @size], @buffer[@size..-1]
40
+ @buffer = nil
41
+ @size = nil
42
+ process_request(request)
43
+
44
+ receive_data(remainder)
45
+ else
46
+ process_request(@buffer)
47
+ @buffer = nil
48
+ @size = nil
49
+ end
50
+ end
51
+ end
52
+
53
+ def process_request(request)
54
+ request_type, request_body = request[0,1], request[1..-1]
55
+ case request_type.to_i
56
+ when Protocol::ADD
57
+ self.class.filter.add(request_body)
58
+ write_response(true)
59
+ when Protocol::ADD_MANY
60
+ # TODO: support specifying a delimiter
61
+ request_body.split(Protocol::DEFAULT_SEPARATOR).collect { |el| self.class.filter.add(el) }
62
+ when Protocol::INCLUDE
63
+ write_response(self.class.filter.include?(request_body))
64
+ when Protocol::INCLUDE_MANY
65
+ # TODO: support specifying a delimiter
66
+ write_response(request_body.split(Protocol::DEFAULT_SEPARATOR).collect { |el| self.class.filter.include?(el) })
67
+ when Protocol::DUMP
68
+ begin
69
+ File.open(request_body, "w") do |f|
70
+ f.write(self.class.filter.dump)
71
+ end
72
+ write_response(true)
73
+ rescue Exception => e
74
+ write_response(false)
75
+ end
76
+ when Protocol::LOAD
77
+ begin
78
+ self.class.filter = BloomFilter.load(File.read(request_body))
79
+ write_response(true)
80
+ rescue Exception => e
81
+ write_response(false)
82
+ end
83
+ end
84
+ end
85
+
86
+ def write_response(response)
87
+ str = Array(response).inject("") { |s, t| s << (t ? Protocol::TRUE : Protocol::FALSE) }
88
+ send_data([str.size].pack(PACK_N) + str)
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,7 @@
1
+ require 'test_helper'
2
+
3
+ class BloomFilterTest < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'bloom_filter'
8
+
9
+ class Test::Unit::TestCase
10
+ end
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bloom_filter
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Arya Asemanfar
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-22 00:00:00 -08:00
13
+ default_executable: bloom_filter_server
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: eventmachine
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.12.8
34
+ version:
35
+ description: ""
36
+ email: misterfunnyarsal@gmail.com
37
+ executables:
38
+ - bloom_filter_server
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - bin/bloom_filter_server
52
+ - lib/bloom_filter.rb
53
+ - lib/bloom_filter/client.rb
54
+ - lib/bloom_filter/protocol.rb
55
+ - lib/bloom_filter/server.rb
56
+ - test/bloom_filter_test.rb
57
+ - test/test_helper.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/arya/bloom_filter
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: "0"
72
+ version:
73
+ required_rubygems_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ requirements: []
80
+
81
+ rubyforge_project:
82
+ rubygems_version: 1.3.5
83
+ signing_key:
84
+ specification_version: 3
85
+ summary: A simple Ruby BloomFilter implementation, usable in-process or in a client-server model.
86
+ test_files:
87
+ - test/bloom_filter_test.rb
88
+ - test/test_helper.rb