qbloom_filter 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +1 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +36 -0
- data/README.md +45 -0
- data/Rakefile +2 -0
- data/bin/bundle +114 -0
- data/bin/console +14 -0
- data/bin/htmldiff +29 -0
- data/bin/ldiff +29 -0
- data/bin/rake +29 -0
- data/bin/rspec +29 -0
- data/bin/setup +8 -0
- data/bloom_filter.gemspec +29 -0
- data/lib/bloom_filter.rb +63 -0
- data/lib/bloom_filter/version.rb +3 -0
- metadata +63 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c8384171d7c3a67af223b2425c08ec9fa8ccf4c784d658f5951b37642708a384
|
4
|
+
data.tar.gz: a50a3260fcd55e0284e85a1b78f4f34dce3f5ff9bf4a56add2cdebc3aecd1d07
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9fe8aeae6469952e99be203cbe4c12cf03317b7eb9499a2bde069cf11c980d3f7746d29fcb2853451ae82ff5d244f87491103bd15d9125e445f44bb63bbabbfb
|
7
|
+
data.tar.gz: 4a43600e93388a498797dcb6706801b105c470602e0449da6bd37faf885ea660f17c3a9a5fcbfede9f1860af9c0cc8fc47a960073ba893e523fca13d4a8e6590
|
data/.gitignore
ADDED
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--require spec_helper
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
qbloom_filter (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
bitset (1.2.0)
|
10
|
+
diff-lcs (1.4.4)
|
11
|
+
rake (12.3.3)
|
12
|
+
rspec (3.9.0)
|
13
|
+
rspec-core (~> 3.9.0)
|
14
|
+
rspec-expectations (~> 3.9.0)
|
15
|
+
rspec-mocks (~> 3.9.0)
|
16
|
+
rspec-core (3.9.2)
|
17
|
+
rspec-support (~> 3.9.3)
|
18
|
+
rspec-expectations (3.9.2)
|
19
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
20
|
+
rspec-support (~> 3.9.0)
|
21
|
+
rspec-mocks (3.9.1)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.9.0)
|
24
|
+
rspec-support (3.9.3)
|
25
|
+
|
26
|
+
PLATFORMS
|
27
|
+
x64-mingw32
|
28
|
+
|
29
|
+
DEPENDENCIES
|
30
|
+
bitset (~> 1.2)
|
31
|
+
qbloom_filter!
|
32
|
+
rake (~> 12.0)
|
33
|
+
rspec (~> 3.0)
|
34
|
+
|
35
|
+
BUNDLED WITH
|
36
|
+
2.1.4
|
data/README.md
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
# BloomFilter
|
2
|
+
|
3
|
+
A Bloom filter is a space-efficient probabilistic data structure
|
4
|
+
[Wiki](https://en.wikipedia.org/wiki/Bloom_filter)
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'bloom_filter'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle install
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install bloom_filter
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
After installation the instance can be created.
|
24
|
+
And two parameters can be used to describe the bloom filter:
|
25
|
+
|
26
|
+
* first - number of items that are be inserted(default: 100)
|
27
|
+
* second - False Positive probability(default: 0.01)
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
bloom_filter = BloomFilter::Filter.new(1000, 0.001)
|
31
|
+
```
|
32
|
+
|
33
|
+
#### API
|
34
|
+
__add(value)__ - add item into filter
|
35
|
+
|
36
|
+
__includes?(value)__ - check if filter includes the value
|
37
|
+
|
38
|
+
__contains?(value)__ - alias of __includes?(value)__
|
39
|
+
|
40
|
+
__count__ - returns number of inserted items
|
41
|
+
|
42
|
+
## Contributing
|
43
|
+
|
44
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/superedriver/bloom_filter
|
45
|
+
|
data/Rakefile
ADDED
data/bin/bundle
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'bundle' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "rubygems"
|
12
|
+
|
13
|
+
m = Module.new do
|
14
|
+
module_function
|
15
|
+
|
16
|
+
def invoked_as_script?
|
17
|
+
File.expand_path($0) == File.expand_path(__FILE__)
|
18
|
+
end
|
19
|
+
|
20
|
+
def env_var_version
|
21
|
+
ENV["BUNDLER_VERSION"]
|
22
|
+
end
|
23
|
+
|
24
|
+
def cli_arg_version
|
25
|
+
return unless invoked_as_script? # don't want to hijack other binstubs
|
26
|
+
return unless "update".start_with?(ARGV.first || " ") # must be running `bundle update`
|
27
|
+
bundler_version = nil
|
28
|
+
update_index = nil
|
29
|
+
ARGV.each_with_index do |a, i|
|
30
|
+
if update_index && update_index.succ == i && a =~ Gem::Version::ANCHORED_VERSION_PATTERN
|
31
|
+
bundler_version = a
|
32
|
+
end
|
33
|
+
next unless a =~ /\A--bundler(?:[= ](#{Gem::Version::VERSION_PATTERN}))?\z/
|
34
|
+
bundler_version = $1
|
35
|
+
update_index = i
|
36
|
+
end
|
37
|
+
bundler_version
|
38
|
+
end
|
39
|
+
|
40
|
+
def gemfile
|
41
|
+
gemfile = ENV["BUNDLE_GEMFILE"]
|
42
|
+
return gemfile if gemfile && !gemfile.empty?
|
43
|
+
|
44
|
+
File.expand_path("../../Gemfile", __FILE__)
|
45
|
+
end
|
46
|
+
|
47
|
+
def lockfile
|
48
|
+
lockfile =
|
49
|
+
case File.basename(gemfile)
|
50
|
+
when "gems.rb" then gemfile.sub(/\.rb$/, gemfile)
|
51
|
+
else "#{gemfile}.lock"
|
52
|
+
end
|
53
|
+
File.expand_path(lockfile)
|
54
|
+
end
|
55
|
+
|
56
|
+
def lockfile_version
|
57
|
+
return unless File.file?(lockfile)
|
58
|
+
lockfile_contents = File.read(lockfile)
|
59
|
+
return unless lockfile_contents =~ /\n\nBUNDLED WITH\n\s{2,}(#{Gem::Version::VERSION_PATTERN})\n/
|
60
|
+
Regexp.last_match(1)
|
61
|
+
end
|
62
|
+
|
63
|
+
def bundler_version
|
64
|
+
@bundler_version ||=
|
65
|
+
env_var_version || cli_arg_version ||
|
66
|
+
lockfile_version
|
67
|
+
end
|
68
|
+
|
69
|
+
def bundler_requirement
|
70
|
+
return "#{Gem::Requirement.default}.a" unless bundler_version
|
71
|
+
|
72
|
+
bundler_gem_version = Gem::Version.new(bundler_version)
|
73
|
+
|
74
|
+
requirement = bundler_gem_version.approximate_recommendation
|
75
|
+
|
76
|
+
return requirement unless Gem::Version.new(Gem::VERSION) < Gem::Version.new("2.7.0")
|
77
|
+
|
78
|
+
requirement += ".a" if bundler_gem_version.prerelease?
|
79
|
+
|
80
|
+
requirement
|
81
|
+
end
|
82
|
+
|
83
|
+
def load_bundler!
|
84
|
+
ENV["BUNDLE_GEMFILE"] ||= gemfile
|
85
|
+
|
86
|
+
activate_bundler
|
87
|
+
end
|
88
|
+
|
89
|
+
def activate_bundler
|
90
|
+
gem_error = activation_error_handling do
|
91
|
+
gem "bundler", bundler_requirement
|
92
|
+
end
|
93
|
+
return if gem_error.nil?
|
94
|
+
require_error = activation_error_handling do
|
95
|
+
require "bundler/version"
|
96
|
+
end
|
97
|
+
return if require_error.nil? && Gem::Requirement.new(bundler_requirement).satisfied_by?(Gem::Version.new(Bundler::VERSION))
|
98
|
+
warn "Activating bundler (#{bundler_requirement}) failed:\n#{gem_error.message}\n\nTo install the version of bundler this project requires, run `gem install bundler -v '#{bundler_requirement}'`"
|
99
|
+
exit 42
|
100
|
+
end
|
101
|
+
|
102
|
+
def activation_error_handling
|
103
|
+
yield
|
104
|
+
nil
|
105
|
+
rescue StandardError, LoadError => e
|
106
|
+
e
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
m.load_bundler!
|
111
|
+
|
112
|
+
if m.invoked_as_script?
|
113
|
+
load Gem.bin_path("bundler", "bundle")
|
114
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "bloom_filter"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/htmldiff
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'htmldiff' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("diff-lcs", "htmldiff")
|
data/bin/ldiff
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'ldiff' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("diff-lcs", "ldiff")
|
data/bin/rake
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rake' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rake", "rake")
|
data/bin/rspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# This file was generated by Bundler.
|
6
|
+
#
|
7
|
+
# The application 'rspec' is installed as part of a gem, and
|
8
|
+
# this file is here to facilitate running it.
|
9
|
+
#
|
10
|
+
|
11
|
+
require "pathname"
|
12
|
+
ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../../Gemfile",
|
13
|
+
Pathname.new(__FILE__).realpath)
|
14
|
+
|
15
|
+
bundle_binstub = File.expand_path("../bundle", __FILE__)
|
16
|
+
|
17
|
+
if File.file?(bundle_binstub)
|
18
|
+
if File.read(bundle_binstub, 300) =~ /This file was generated by Bundler/
|
19
|
+
load(bundle_binstub)
|
20
|
+
else
|
21
|
+
abort("Your `bin/bundle` was not generated by Bundler, so this binstub cannot run.
|
22
|
+
Replace `bin/bundle` by running `bundle binstubs bundler --force`, then run this command again.")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
require "rubygems"
|
27
|
+
require "bundler/setup"
|
28
|
+
|
29
|
+
load Gem.bin_path("rspec-core", "rspec")
|
data/bin/setup
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require_relative 'lib/bloom_filter/version'
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "qbloom_filter"
|
5
|
+
spec.version = BloomFilter::VERSION
|
6
|
+
spec.authors = ["qaz"]
|
7
|
+
spec.email = ["qaz@qaz.qaz"]
|
8
|
+
|
9
|
+
spec.licenses = ['MIT']
|
10
|
+
spec.summary = %q{Bloom Filter}
|
11
|
+
spec.description = %q{Simple Bloom Filter}
|
12
|
+
spec.homepage = "https://github.com/superedriver/bloom-filter"
|
13
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
|
+
|
15
|
+
spec.metadata["allowed_push_host"] = "https://rubygems.org"
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/superedriver/bloom-filter"
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/superedriver/bloom-filter"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
end
|
data/lib/bloom_filter.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
require "bloom_filter/version"
|
2
|
+
require "bitset"
|
3
|
+
require 'digest/md5'
|
4
|
+
|
5
|
+
module BloomFilter
|
6
|
+
PRIME = 100_000_000_003
|
7
|
+
MAX_HASH_PARAM = 1000
|
8
|
+
class Filter
|
9
|
+
attr_reader :count
|
10
|
+
|
11
|
+
def initialize(capacity = 100, probability = 0.01)
|
12
|
+
# amount of inserted elements
|
13
|
+
@count = 0
|
14
|
+
|
15
|
+
#number of bits in the array
|
16
|
+
@m = (-(capacity * Math.log(probability)) / (Math.log(2) ** 2)).ceil
|
17
|
+
|
18
|
+
@bitset = Bitset.new(@m)
|
19
|
+
|
20
|
+
#number of hash functions that minimizes the probability of false positives
|
21
|
+
@k = (Math.log(2) * (@m / capacity)).ceil
|
22
|
+
|
23
|
+
# a, b params for hash functions
|
24
|
+
@hash_params = []
|
25
|
+
@k.times { @hash_params.push([rand(1000), rand(1000)]) }
|
26
|
+
end
|
27
|
+
|
28
|
+
def add(value)
|
29
|
+
x = get_hash(value)
|
30
|
+
was_inserted = true
|
31
|
+
@k.times do |i|
|
32
|
+
a, b = @hash_params[i]
|
33
|
+
position = get_position(a, b, x)
|
34
|
+
was_inserted = false unless @bitset[position]
|
35
|
+
@bitset[position] = true
|
36
|
+
end
|
37
|
+
@count += 1 unless was_inserted
|
38
|
+
value
|
39
|
+
end
|
40
|
+
|
41
|
+
def contains?(value)
|
42
|
+
x = get_hash(value)
|
43
|
+
result = true
|
44
|
+
@k.times do |i|
|
45
|
+
a, b = @hash_params[i]
|
46
|
+
result = false unless @bitset[get_position(a, b, x)]
|
47
|
+
end
|
48
|
+
|
49
|
+
result
|
50
|
+
end
|
51
|
+
alias :includes? :contains?
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def get_position(a, b, val)
|
56
|
+
((a * val + b) % PRIME) % @m
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_hash(value)
|
60
|
+
Digest::MD5.hexdigest(value.to_s).to_i(16)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: qbloom_filter
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- qaz
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-09-27 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Simple Bloom Filter
|
14
|
+
email:
|
15
|
+
- qaz@qaz.qaz
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- ".gitignore"
|
21
|
+
- ".rspec"
|
22
|
+
- Gemfile
|
23
|
+
- Gemfile.lock
|
24
|
+
- README.md
|
25
|
+
- Rakefile
|
26
|
+
- bin/bundle
|
27
|
+
- bin/console
|
28
|
+
- bin/htmldiff
|
29
|
+
- bin/ldiff
|
30
|
+
- bin/rake
|
31
|
+
- bin/rspec
|
32
|
+
- bin/setup
|
33
|
+
- bloom_filter.gemspec
|
34
|
+
- lib/bloom_filter.rb
|
35
|
+
- lib/bloom_filter/version.rb
|
36
|
+
homepage: https://github.com/superedriver/bloom-filter
|
37
|
+
licenses:
|
38
|
+
- MIT
|
39
|
+
metadata:
|
40
|
+
allowed_push_host: https://rubygems.org
|
41
|
+
homepage_uri: https://github.com/superedriver/bloom-filter
|
42
|
+
source_code_uri: https://github.com/superedriver/bloom-filter
|
43
|
+
changelog_uri: https://github.com/superedriver/bloom-filter
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 2.3.0
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubygems_version: 3.0.3
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: Bloom Filter
|
63
|
+
test_files: []
|