json-bloomfilter 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +12 -14
- data/coffee/bloomfilter.coffee +30 -9
- data/js/json-bloomfilter.min.js +1 -1
- data/lib/json/bloomfilter.rb +22 -9
- data/lib/json/bloomfilter/version.rb +1 -1
- data/spec/javascripts/bloomfilter_spec.js +13 -0
- data/spec/json/bloomfilter_spec.rb +13 -0
- metadata +3 -3
data/README.md
CHANGED
@@ -16,21 +16,10 @@ All of this while not sending the entire list to the client, which is something
|
|
16
16
|
|
17
17
|
### Ruby
|
18
18
|
|
19
|
-
```shell
|
20
|
-
gem install json-bloomfilter
|
21
|
-
|
22
|
-
# and optionally add
|
23
|
-
require "json/bloomfilter"
|
24
19
|
```
|
25
|
-
|
26
|
-
or
|
27
|
-
|
28
|
-
```ruby
|
29
|
-
gem 'json-bloomfilter'
|
20
|
+
gem install json-bloomfilter
|
30
21
|
```
|
31
22
|
|
32
|
-
in your Gemfile
|
33
|
-
|
34
23
|
### Javascript
|
35
24
|
|
36
25
|
With the gem installed run
|
@@ -46,6 +35,8 @@ and the `json-bloomfilter.min.js` will be copied to your local directory. If you
|
|
46
35
|
### Ruby
|
47
36
|
|
48
37
|
```ruby
|
38
|
+
require "json/bloomfilter"
|
39
|
+
|
49
40
|
# use the factory to configure the filter
|
50
41
|
filter = JsonBloomFilter.build 10000, 0.01 # number of expected items, desired error rate
|
51
42
|
|
@@ -55,6 +46,9 @@ filter = JsonBloomFilter.new size: 100
|
|
55
46
|
# and add entries
|
56
47
|
filter.add "foo"
|
57
48
|
filter.add "bar"
|
49
|
+
# alternatively
|
50
|
+
filter.add ["foo", "bar"]
|
51
|
+
# test the entries
|
58
52
|
filter.test "foo" #=> true
|
59
53
|
filter.test "bar" #=> true
|
60
54
|
filter.test "doh" #=> probably false
|
@@ -82,6 +76,9 @@ filter = new JsonBloomFilter({ size: 100 });
|
|
82
76
|
// and add entries
|
83
77
|
filter.add("foo");
|
84
78
|
filter.add("bar");
|
79
|
+
// alternatively
|
80
|
+
filter.add(["foo", "bar"]);
|
81
|
+
// test the entries
|
85
82
|
filter.test("foo"); //=> true
|
86
83
|
filter.test("bar"); //=> true
|
87
84
|
filter.test("doh"); //=> probably false
|
@@ -132,8 +129,9 @@ Additionally you can pass along:
|
|
132
129
|
|
133
130
|
## Release notes
|
134
131
|
|
135
|
-
* **0.1.
|
136
|
-
* **0.1.
|
132
|
+
* **0.1.4** Changed .build function to take a list of items
|
133
|
+
* **0.1.3** Adds a check for non positive capacity values on build
|
134
|
+
* **0.1.2** Adds Zlib dependency
|
137
135
|
* **0.1.1** Fixes a JS integer overflow issue and makes Ruby 1.8.7 compatible
|
138
136
|
* **0.1.0** Adds travis-ci. Bumped minor release version
|
139
137
|
* **0.0.6** Adds a factory that takes a size + error rate
|
data/coffee/bloomfilter.coffee
CHANGED
@@ -5,24 +5,45 @@ JsonBloomfilter = (options = {}) ->
|
|
5
5
|
seed: (new Date().getTime() / 1000),
|
6
6
|
bits: null
|
7
7
|
|
8
|
+
items = delete options["items"]
|
8
9
|
@options[key] = value for key, value of options
|
9
10
|
@bits = new JsonBloomfilter.BitArray(@options["size"], @options["bits"])
|
11
|
+
@add(items) if items
|
10
12
|
this
|
11
13
|
|
12
|
-
JsonBloomfilter.build = (
|
14
|
+
JsonBloomfilter.build = (capacity_or_items, error_rate) ->
|
15
|
+
capacity = JsonBloomfilter.capacity_for(capacity_or_items)
|
16
|
+
items = JsonBloomfilter.items_for(capacity_or_items)
|
13
17
|
throw new Error("Capacity needs to be a positive integer") if capacity <= 0
|
18
|
+
new JsonBloomfilter({size: JsonBloomfilter.size_for(capacity, error_rate), hashes: JsonBloomfilter.hashes_for(capacity, error_rate), items: items})
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
20
|
+
JsonBloomfilter.capacity_for = (capacity_or_items) ->
|
21
|
+
if capacity_or_items instanceof Array
|
22
|
+
capacity_or_items.length
|
23
|
+
else
|
24
|
+
capacity_or_items
|
18
25
|
|
19
|
-
JsonBloomfilter.
|
20
|
-
|
26
|
+
JsonBloomfilter.items_for = (capacity_or_items) ->
|
27
|
+
if capacity_or_items instanceof Array
|
28
|
+
capacity_or_items
|
29
|
+
else
|
30
|
+
null
|
31
|
+
|
32
|
+
JsonBloomfilter.size_for = (capacity, error_rate) ->
|
33
|
+
Math.ceil(capacity * Math.log(error_rate) / Math.log(1.0 / Math.pow(2,Math.log(2))))
|
34
|
+
|
35
|
+
JsonBloomfilter.hashes_for = (capacity, error_rate) ->
|
36
|
+
Math.round(Math.log(2) * @size_for(capacity, error_rate) / capacity)
|
37
|
+
|
38
|
+
JsonBloomfilter.prototype.add = (keys) ->
|
39
|
+
for key in [].concat(keys)
|
40
|
+
@bits.add(index) for index in @indexesFor(key)
|
21
41
|
return
|
22
42
|
|
23
|
-
JsonBloomfilter.prototype.test = (
|
24
|
-
for
|
25
|
-
|
43
|
+
JsonBloomfilter.prototype.test = (keys) ->
|
44
|
+
for key in [].concat(keys)
|
45
|
+
for index in @indexesFor(key)
|
46
|
+
return false if @bits.get(index) == 0
|
26
47
|
true
|
27
48
|
|
28
49
|
JsonBloomfilter.prototype.clear = ->
|
data/js/json-bloomfilter.min.js
CHANGED
@@ -1 +1 @@
|
|
1
|
-
var JsonBloomfilter;JsonBloomfilter=function(options){var key,value;if(options==null){options={}}this.options={size:100,hashes:4,seed:new Date().getTime()/1000,bits:null};for(key in options){value=options[key];this.options[key]=value}this.bits=new JsonBloomfilter.BitArray(this.options.size,this.options.bits);return this};JsonBloomfilter.build=function(
|
1
|
+
var JsonBloomfilter;JsonBloomfilter=function(options){var items,key,value;if(options==null){options={}}this.options={size:100,hashes:4,seed:new Date().getTime()/1000,bits:null};items=delete options.items;for(key in options){value=options[key];this.options[key]=value}this.bits=new JsonBloomfilter.BitArray(this.options.size,this.options.bits);if(items){this.add(items)}return this};JsonBloomfilter.build=function(capacity_or_items,error_rate){var capacity,items;capacity=JsonBloomfilter.capacity_for(capacity_or_items);items=JsonBloomfilter.items_for(capacity_or_items);if(capacity<=0){throw new Error("Capacity needs to be a positive integer")}return new JsonBloomfilter({size:JsonBloomfilter.size_for(capacity,error_rate),hashes:JsonBloomfilter.hashes_for(capacity,error_rate),items:items})};JsonBloomfilter.capacity_for=function(capacity_or_items){if(capacity_or_items instanceof Array){return capacity_or_items.length}else{return capacity_or_items}};JsonBloomfilter.items_for=function(capacity_or_items){if(capacity_or_items instanceof Array){return capacity_or_items}else{return null}};JsonBloomfilter.size_for=function(capacity,error_rate){return Math.ceil(capacity*Math.log(error_rate)/Math.log(1/Math.pow(2,Math.log(2))))};JsonBloomfilter.hashes_for=function(capacity,error_rate){return Math.round(Math.log(2)*this.size_for(capacity,error_rate)/capacity)};JsonBloomfilter.prototype.add=function(keys){var index,key,_i,_j,_len,_len1,_ref,_ref1;_ref=[].concat(keys);for(_i=0,_len=_ref.length;_i<_len;_i++){key=_ref[_i];_ref1=this.indexesFor(key);for(_j=0,_len1=_ref1.length;_j<_len1;_j++){index=_ref1[_j];this.bits.add(index)}}};JsonBloomfilter.prototype.test=function(keys){var index,key,_i,_j,_len,_len1,_ref,_ref1;_ref=[].concat(keys);for(_i=0,_len=_ref.length;_i<_len;_i++){key=_ref[_i];_ref1=this.indexesFor(key);for(_j=0,_len1=_ref1.length;_j<_len1;_j++){index=_ref1[_j];if(this.bits.get(index)===0){return false}}}return true};JsonBloomfilter.prototype.clear=function(){this.bits=new JsonBloomfilter.BitArray(this.options.size)};JsonBloomfilter.prototype.toHash=function(){var hash,key,value,_ref;hash={};_ref=this.options;for(key in _ref){value=_ref[key];hash[key]=value}hash.bits=this.bits.field;return hash};JsonBloomfilter.prototype.toJson=function(){return JSON.stringify(this.toHash())};JsonBloomfilter.prototype.indexesFor=function(key){var index,indexes,_i,_ref;indexes=[];for(index=_i=0,_ref=this.options.hashes-1;0<=_ref?_i<=_ref:_i>=_ref;index=0<=_ref?++_i:--_i){indexes.push(JsonBloomfilter.Zlib.crc32(""+key+":"+(index+this.options.seed))%this.options.size)}return indexes};JsonBloomfilter.BitArray=function(size,field){var arrayLength,i,_i,_ref;if(field==null){field=null}if(!size){throw new Error("Missing argument: size")}this.ELEMENT_WIDTH=32;this.size=size;this.field=field||[];arrayLength=Math.floor(((size-1)/this.ELEMENT_WIDTH)+1);if(!field){for(i=_i=0,_ref=arrayLength-1;0<=_ref?_i<=_ref:_i>=_ref;i=0<=_ref?++_i:--_i){this.field[i]=0}}return this};JsonBloomfilter.BitArray.prototype.add=function(position){return this.set(position,1)};JsonBloomfilter.BitArray.prototype.remove=function(position){return this.set(position,0)};JsonBloomfilter.BitArray.prototype.set=function(position,value){var aPos,bChange;if(position>=this.size){throw new Error("BitArray index out of bounds")}aPos=this.arrayPosition(position);bChange=this.bitChange(position);if(value===1){this.field[aPos]=this.abs(this.field[aPos]|bChange)}else{if((this.field[aPos]&bChange)!==0){this.field[aPos]=this.abs(this.field[aPos]^bChange)}}return true};JsonBloomfilter.BitArray.prototype.get=function(position){var aPos,bChange;if(position>=this.size){throw new Error("BitArray index out of bounds")}aPos=this.arrayPosition(position);bChange=this.bitChange(position);if(this.abs(this.field[aPos]&bChange)>0){return 1}else{return 0}};JsonBloomfilter.BitArray.prototype.arrayPosition=function(position){return Math.floor(position/this.ELEMENT_WIDTH)};JsonBloomfilter.BitArray.prototype.bitChange=function(position){return this.abs(1<<position%this.ELEMENT_WIDTH)};JsonBloomfilter.BitArray.prototype.abs=function(val){if(val<0){val+=4294967295}return val};JsonBloomfilter.BitArray.prototype.toString=function(){var i,output,_i,_ref;output="";for(i=_i=0,_ref=this.size-1;0<=_ref?_i<=_ref:_i>=_ref;i=0<=_ref?++_i:--_i){output+=this.get(i)}return output};JsonBloomfilter.Zlib={CRC32_TABLE:new Array(0,1996959894,3993919788,2567524794,124634137,1886057615,3915621685,2657392035,249268274,2044508324,3772115230,2547177864,162941995,2125561021,3887607047,2428444049,498536548,1789927666,4089016648,2227061214,450548861,1843258603,4107580753,2211677639,325883990,1684777152,4251122042,2321926636,335633487,1661365465,4195302755,2366115317,997073096,1281953886,3579855332,2724688242,1006888145,1258607687,3524101629,2768942443,901097722,1119000684,3686517206,2898065728,853044451,1172266101,3705015759,2882616665,651767980,1373503546,3369554304,3218104598,565507253,1454621731,3485111705,3099436303,671266974,1594198024,3322730930,2970347812,795835527,1483230225,3244367275,3060149565,1994146192,31158534,2563907772,4023717930,1907459465,112637215,2680153253,3904427059,2013776290,251722036,2517215374,3775830040,2137656763,141376813,2439277719,3865271297,1802195444,476864866,2238001368,4066508878,1812370925,453092731,2181625025,4111451223,1706088902,314042704,2344532202,4240017532,1658658271,366619977,2362670323,4224994405,1303535960,984961486,2747007092,3569037538,1256170817,1037604311,2765210733,3554079995,1131014506,879679996,2909243462,3663771856,1141124467,855842277,2852801631,3708648649,1342533948,654459306,3188396048,3373015174,1466479909,544179635,3110523913,3462522015,1591671054,702138776,2966460450,3352799412,1504918807,783551873,3082640443,3233442989,3988292384,2596254646,62317068,1957810842,3939845945,2647816111,81470997,1943803523,3814918930,2489596804,225274430,2053790376,3826175755,2466906013,167816743,2097651377,4027552580,2265490386,503444072,1762050814,4150417245,2154129355,426522225,1852507879,4275313526,2312317920,282753626,1742555852,4189708143,2394877945,397917763,1622183637,3604390888,2714866558,953729732,1340076626,3518719985,2797360999,1068828381,1219638859,3624741850,2936675148,906185462,1090812512,3747672003,2825379669,829329135,1181335161,3412177804,3160834842,628085408,1382605366,3423369109,3138078467,570562233,1426400815,3317316542,2998733608,733239954,1555261956,3268935591,3050360625,752459403,1541320221,2607071920,3965973030,1969922972,40735498,2617837225,3943577151,1913087877,83908371,2512341634,3803740692,2075208622,213261112,2463272603,3855990285,2094854071,198958881,2262029012,4057260610,1759359992,534414190,2176718541,4139329115,1873836001,414664567,2282248934,4279200368,1711684554,285281116,2405801727,4167216745,1634467795,376229701,2685067896,3608007406,1308918612,956543938,2808555105,3495958263,1231636301,1047427035,2932959818,3654703836,1088359270,936918000,2847714899,3736837829,1202900863,817233897,3183342108,3401237130,1404277552,615818150,3134207493,3453421203,1423857449,601450431,3009837614,3294710456,1567103746,711928724,3020668471,3272380065,1510334235,755167117),crc32:function(string){var bytes,crc,i,iTop,n;bytes=this.bytesFor(string);crc=0;n=0;crc=crc^(-1);i=0;iTop=bytes.length;while(i<iTop){n=(crc^bytes[i])&255;crc=(crc>>>8)^this.CRC32_TABLE[n];i++}crc=crc^(-1);if(crc<0){crc+=4294967296}return crc},bytesFor:function(string){var bytes,i;bytes=[];i=0;while(i<string.length){bytes.push(string.charCodeAt(i));++i}return bytes}};
|
data/lib/json/bloomfilter.rb
CHANGED
@@ -5,26 +5,39 @@ require "zlib"
|
|
5
5
|
class JsonBloomfilter
|
6
6
|
DEFAULTS = { "size" => 100, "hashes" => 4, "seed" => Time.new.to_i, "bits" => nil }
|
7
7
|
|
8
|
-
def self.build
|
8
|
+
def self.build capacity_or_items, error_rate
|
9
|
+
capacity, items = capacity_or_items.is_a?(Array) ? [capacity_or_items.length, capacity_or_items] : [capacity_or_items, nil]
|
9
10
|
raise ArgumentError.new("Capacity needs to be a positive integer") if capacity <= 0
|
10
|
-
size
|
11
|
-
|
12
|
-
|
11
|
+
JsonBloomfilter.new :size => size_for(capacity, error_rate), :hashes => hashes_for(capacity, error_rate), items: items
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.size_for capacity, error_rate
|
15
|
+
(capacity * Math.log(error_rate) / Math.log(1.0 / 2**Math.log(2))).ceil
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.hashes_for capacity, error_rate
|
19
|
+
(Math.log(2) * size_for(capacity, error_rate) / capacity).round
|
13
20
|
end
|
14
21
|
|
15
22
|
def initialize options = {}
|
23
|
+
items = options.delete("items")
|
16
24
|
@options = merge_defaults_with options
|
17
25
|
@bits = BitArray.new(@options["size"], @options["bits"])
|
26
|
+
add(items) if items
|
18
27
|
end
|
19
28
|
|
20
|
-
def add
|
21
|
-
|
29
|
+
def add keys
|
30
|
+
[keys].flatten.each do |key|
|
31
|
+
indexes_for(key).each { |index| @bits.add(index) }
|
32
|
+
end
|
22
33
|
nil
|
23
34
|
end
|
24
35
|
|
25
|
-
def test
|
26
|
-
|
27
|
-
|
36
|
+
def test keys
|
37
|
+
[keys].flatten.each do |key|
|
38
|
+
indexes_for(key).each do |index|
|
39
|
+
return false if @bits.get(index) == 0
|
40
|
+
end
|
28
41
|
end
|
29
42
|
true
|
30
43
|
end
|
@@ -7,6 +7,12 @@ describe("JsonBloomfilter", function() {
|
|
7
7
|
expect(bf.toHash()["size"]).toBe(9586);
|
8
8
|
});
|
9
9
|
|
10
|
+
it("should optionally take an array of strings instead of a capacity", function() {
|
11
|
+
bf = JsonBloomfilter.build(["foo", "bar"], 0.01);
|
12
|
+
expect(bf.toHash()["hashes"]).toBe(7);
|
13
|
+
expect(bf.toHash()["size"]).toBe(20);
|
14
|
+
});
|
15
|
+
|
10
16
|
it("should require a positive integer capacity", function() {
|
11
17
|
expect(function(){new JsonBloomfilter.build(0, 0.01)}).toThrow("Capacity needs to be a positive integer")
|
12
18
|
});
|
@@ -50,6 +56,13 @@ describe("JsonBloomfilter", function() {
|
|
50
56
|
expect(bf.test("foo")).toBe(true);
|
51
57
|
});
|
52
58
|
|
59
|
+
it("should be able to add and test more than one key at a time", function() {
|
60
|
+
expect(bf.test("foo")).toBe(false);
|
61
|
+
expect(bf.test("bar")).toBe(false);
|
62
|
+
bf.add(["foo", "bar"]);
|
63
|
+
expect(bf.test(["foo", "bar"])).toBe(true);
|
64
|
+
});
|
65
|
+
|
53
66
|
it("should not change anything if added twice", function() {
|
54
67
|
expect(bf.test("foobar")).toBe(true);
|
55
68
|
bits = bf.toHash()["bits"];
|
@@ -9,6 +9,12 @@ describe JsonBloomfilter do
|
|
9
9
|
expect(bf.to_hash["size"]).to be == 9586
|
10
10
|
end
|
11
11
|
|
12
|
+
it "should optionally take an array of strings instead of a capacity" do
|
13
|
+
bf = JsonBloomfilter.build ["foo", "bar"], 0.01
|
14
|
+
expect(bf.to_hash["hashes"]).to be == 7
|
15
|
+
expect(bf.to_hash["size"]).to be == 20
|
16
|
+
end
|
17
|
+
|
12
18
|
it "should require a capacity of > 0" do
|
13
19
|
expect(lambda{JsonBloomfilter.build 0, 0.01}).to raise_error(ArgumentError)
|
14
20
|
end
|
@@ -44,6 +50,13 @@ describe JsonBloomfilter do
|
|
44
50
|
expect(@bf.test "foo").to be_true
|
45
51
|
end
|
46
52
|
|
53
|
+
it "should be able to add and test more than one key at a time" do
|
54
|
+
expect(@bf.test "foo").to be_false
|
55
|
+
expect(@bf.test "bar").to be_false
|
56
|
+
@bf.add ["foo", "bar"]
|
57
|
+
expect(@bf.test ["foo", "bar"]).to be_true
|
58
|
+
end
|
59
|
+
|
47
60
|
it "should not change anything if added twice" do
|
48
61
|
expect(@bf.test "foobar").to be_true
|
49
62
|
bits = @bf.to_hash["bits"]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-bloomfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
@@ -210,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
210
210
|
version: '0'
|
211
211
|
segments:
|
212
212
|
- 0
|
213
|
-
hash:
|
213
|
+
hash: 762970087584583546
|
214
214
|
requirements: []
|
215
215
|
rubyforge_project:
|
216
216
|
rubygems_version: 1.8.24
|