json-bloomfilter 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +12 -14
- data/coffee/bloomfilter.coffee +30 -9
- data/js/json-bloomfilter.min.js +1 -1
- data/lib/json/bloomfilter.rb +22 -9
- data/lib/json/bloomfilter/version.rb +1 -1
- data/spec/javascripts/bloomfilter_spec.js +13 -0
- data/spec/json/bloomfilter_spec.rb +13 -0
- metadata +3 -3
data/README.md
CHANGED
@@ -16,21 +16,10 @@ All of this while not sending the entire list to the client, which is something
|
|
16
16
|
|
17
17
|
### Ruby
|
18
18
|
|
19
|
-
```shell
|
20
|
-
gem install json-bloomfilter
|
21
|
-
|
22
|
-
# and optionally add
|
23
|
-
require "json/bloomfilter"
|
24
19
|
```
|
25
|
-
|
26
|
-
or
|
27
|
-
|
28
|
-
```ruby
|
29
|
-
gem 'json-bloomfilter'
|
20
|
+
gem install json-bloomfilter
|
30
21
|
```
|
31
22
|
|
32
|
-
in your Gemfile
|
33
|
-
|
34
23
|
### Javascript
|
35
24
|
|
36
25
|
With the gem installed run
|
@@ -46,6 +35,8 @@ and the `json-bloomfilter.min.js` will be copied to your local directory. If you
|
|
46
35
|
### Ruby
|
47
36
|
|
48
37
|
```ruby
|
38
|
+
require "json/bloomfilter"
|
39
|
+
|
49
40
|
# use the factory to configure the filter
|
50
41
|
filter = JsonBloomFilter.build 10000, 0.01 # number of expected items, desired error rate
|
51
42
|
|
@@ -55,6 +46,9 @@ filter = JsonBloomFilter.new size: 100
|
|
55
46
|
# and add entries
|
56
47
|
filter.add "foo"
|
57
48
|
filter.add "bar"
|
49
|
+
# alternatively
|
50
|
+
filter.add ["foo", "bar"]
|
51
|
+
# test the entries
|
58
52
|
filter.test "foo" #=> true
|
59
53
|
filter.test "bar" #=> true
|
60
54
|
filter.test "doh" #=> probably false
|
@@ -82,6 +76,9 @@ filter = new JsonBloomFilter({ size: 100 });
|
|
82
76
|
// and add entries
|
83
77
|
filter.add("foo");
|
84
78
|
filter.add("bar");
|
79
|
+
// alternatively
|
80
|
+
filter.add(["foo", "bar"]);
|
81
|
+
// test the entries
|
85
82
|
filter.test("foo"); //=> true
|
86
83
|
filter.test("bar"); //=> true
|
87
84
|
filter.test("doh"); //=> probably false
|
@@ -132,8 +129,9 @@ Additionally you can pass along:
|
|
132
129
|
|
133
130
|
## Release notes
|
134
131
|
|
135
|
-
* **0.1.
|
136
|
-
* **0.1.
|
132
|
+
* **0.1.4** Changed .build function to take a list of items
|
133
|
+
* **0.1.3** Adds a check for non positive capacity values on build
|
134
|
+
* **0.1.2** Adds Zlib dependency
|
137
135
|
* **0.1.1** Fixes a JS integer overflow issue and makes Ruby 1.8.7 compatible
|
138
136
|
* **0.1.0** Adds travis-ci. Bumped minor release version
|
139
137
|
* **0.0.6** Adds a factory that takes a size + error rate
|
data/coffee/bloomfilter.coffee
CHANGED
@@ -5,24 +5,45 @@ JsonBloomfilter = (options = {}) ->
|
|
5
5
|
seed: (new Date().getTime() / 1000),
|
6
6
|
bits: null
|
7
7
|
|
8
|
+
items = delete options["items"]
|
8
9
|
@options[key] = value for key, value of options
|
9
10
|
@bits = new JsonBloomfilter.BitArray(@options["size"], @options["bits"])
|
11
|
+
@add(items) if items
|
10
12
|
this
|
11
13
|
|
12
|
-
JsonBloomfilter.build = (
|
14
|
+
JsonBloomfilter.build = (capacity_or_items, error_rate) ->
|
15
|
+
capacity = JsonBloomfilter.capacity_for(capacity_or_items)
|
16
|
+
items = JsonBloomfilter.items_for(capacity_or_items)
|
13
17
|
throw new Error("Capacity needs to be a positive integer") if capacity <= 0
|
18
|
+
new JsonBloomfilter({size: JsonBloomfilter.size_for(capacity, error_rate), hashes: JsonBloomfilter.hashes_for(capacity, error_rate), items: items})
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
20
|
+
JsonBloomfilter.capacity_for = (capacity_or_items) ->
|
21
|
+
if capacity_or_items instanceof Array
|
22
|
+
capacity_or_items.length
|
23
|
+
else
|
24
|
+
capacity_or_items
|
18
25
|
|
19
|
-
JsonBloomfilter.
|
20
|
-
|
26
|
+
JsonBloomfilter.items_for = (capacity_or_items) ->
|
27
|
+
if capacity_or_items instanceof Array
|
28
|
+
capacity_or_items
|
29
|
+
else
|
30
|
+
null
|
31
|
+
|
32
|
+
JsonBloomfilter.size_for = (capacity, error_rate) ->
|
33
|
+
Math.ceil(capacity * Math.log(error_rate) / Math.log(1.0 / Math.pow(2,Math.log(2))))
|
34
|
+
|
35
|
+
JsonBloomfilter.hashes_for = (capacity, error_rate) ->
|
36
|
+
Math.round(Math.log(2) * @size_for(capacity, error_rate) / capacity)
|
37
|
+
|
38
|
+
JsonBloomfilter.prototype.add = (keys) ->
|
39
|
+
for key in [].concat(keys)
|
40
|
+
@bits.add(index) for index in @indexesFor(key)
|
21
41
|
return
|
22
42
|
|
23
|
-
JsonBloomfilter.prototype.test = (
|
24
|
-
for
|
25
|
-
|
43
|
+
JsonBloomfilter.prototype.test = (keys) ->
|
44
|
+
for key in [].concat(keys)
|
45
|
+
for index in @indexesFor(key)
|
46
|
+
return false if @bits.get(index) == 0
|
26
47
|
true
|
27
48
|
|
28
49
|
JsonBloomfilter.prototype.clear = ->
|
data/js/json-bloomfilter.min.js
CHANGED
@@ -1 +1 @@
|
|
1
|
-
var JsonBloomfilter;JsonBloomfilter=function(options){var key,value;if(options==null){options={}}this.options={size:100,hashes:4,seed:new Date().getTime()/1000,bits:null};for(key in options){value=options[key];this.options[key]=value}this.bits=new JsonBloomfilter.BitArray(this.options.size,this.options.bits);return this};JsonBloomfilter.build=function(
|
1
|
+
var JsonBloomfilter;JsonBloomfilter=function(options){var items,key,value;if(options==null){options={}}this.options={size:100,hashes:4,seed:new Date().getTime()/1000,bits:null};items=delete options.items;for(key in options){value=options[key];this.options[key]=value}this.bits=new JsonBloomfilter.BitArray(this.options.size,this.options.bits);if(items){this.add(items)}return this};JsonBloomfilter.build=function(capacity_or_items,error_rate){var capacity,items;capacity=JsonBloomfilter.capacity_for(capacity_or_items);items=JsonBloomfilter.items_for(capacity_or_items);if(capacity<=0){throw new Error("Capacity needs to be a positive integer")}return new JsonBloomfilter({size:JsonBloomfilter.size_for(capacity,error_rate),hashes:JsonBloomfilter.hashes_for(capacity,error_rate),items:items})};JsonBloomfilter.capacity_for=function(capacity_or_items){if(capacity_or_items instanceof Array){return capacity_or_items.length}else{return capacity_or_items}};JsonBloomfilter.items_for=function(capacity_or_items){if(capacity_or_items instanceof Array){return capacity_or_items}else{return null}};JsonBloomfilter.size_for=function(capacity,error_rate){return Math.ceil(capacity*Math.log(error_rate)/Math.log(1/Math.pow(2,Math.log(2))))};JsonBloomfilter.hashes_for=function(capacity,error_rate){return Math.round(Math.log(2)*this.size_for(capacity,error_rate)/capacity)};JsonBloomfilter.prototype.add=function(keys){var index,key,_i,_j,_len,_len1,_ref,_ref1;_ref=[].concat(keys);for(_i=0,_len=_ref.length;_i<_len;_i++){key=_ref[_i];_ref1=this.indexesFor(key);for(_j=0,_len1=_ref1.length;_j<_len1;_j++){index=_ref1[_j];this.bits.add(index)}}};JsonBloomfilter.prototype.test=function(keys){var index,key,_i,_j,_len,_len1,_ref,_ref1;_ref=[].concat(keys);for(_i=0,_len=_ref.length;_i<_len;_i++){key=_ref[_i];_ref1=this.indexesFor(key);for(_j=0,_len1=_ref1.length;_j<_len1;_j++){index=_ref1[_j];if(this.bits.get(index)===0){return false}}}return true};JsonBloomfilter.prototype.clear=function(){this.bits=new JsonBloomfilter.BitArray(this.options.size)};JsonBloomfilter.prototype.toHash=function(){var hash,key,value,_ref;hash={};_ref=this.options;for(key in _ref){value=_ref[key];hash[key]=value}hash.bits=this.bits.field;return hash};JsonBloomfilter.prototype.toJson=function(){return JSON.stringify(this.toHash())};JsonBloomfilter.prototype.indexesFor=function(key){var index,indexes,_i,_ref;indexes=[];for(index=_i=0,_ref=this.options.hashes-1;0<=_ref?_i<=_ref:_i>=_ref;index=0<=_ref?++_i:--_i){indexes.push(JsonBloomfilter.Zlib.crc32(""+key+":"+(index+this.options.seed))%this.options.size)}return indexes};JsonBloomfilter.BitArray=function(size,field){var arrayLength,i,_i,_ref;if(field==null){field=null}if(!size){throw new Error("Missing argument: size")}this.ELEMENT_WIDTH=32;this.size=size;this.field=field||[];arrayLength=Math.floor(((size-1)/this.ELEMENT_WIDTH)+1);if(!field){for(i=_i=0,_ref=arrayLength-1;0<=_ref?_i<=_ref:_i>=_ref;i=0<=_ref?++_i:--_i){this.field[i]=0}}return this};JsonBloomfilter.BitArray.prototype.add=function(position){return this.set(position,1)};JsonBloomfilter.BitArray.prototype.remove=function(position){return this.set(position,0)};JsonBloomfilter.BitArray.prototype.set=function(position,value){var aPos,bChange;if(position>=this.size){throw new Error("BitArray index out of bounds")}aPos=this.arrayPosition(position);bChange=this.bitChange(position);if(value===1){this.field[aPos]=this.abs(this.field[aPos]|bChange)}else{if((this.field[aPos]&bChange)!==0){this.field[aPos]=this.abs(this.field[aPos]^bChange)}}return true};JsonBloomfilter.BitArray.prototype.get=function(position){var aPos,bChange;if(position>=this.size){throw new Error("BitArray index out of bounds")}aPos=this.arrayPosition(position);bChange=this.bitChange(position);if(this.abs(this.field[aPos]&bChange)>0){return 1}else{return 0}};JsonBloomfilter.BitArray.prototype.arrayPosition=function(position){return Math.floor(position/this.ELEMENT_WIDTH)};JsonBloomfilter.BitArray.prototype.bitChange=function(position){return this.abs(1<<position%this.ELEMENT_WIDTH)};JsonBloomfilter.BitArray.prototype.abs=function(val){if(val<0){val+=4294967295}return val};JsonBloomfilter.BitArray.prototype.toString=function(){var i,output,_i,_ref;output="";for(i=_i=0,_ref=this.size-1;0<=_ref?_i<=_ref:_i>=_ref;i=0<=_ref?++_i:--_i){output+=this.get(i)}return output};JsonBloomfilter.Zlib={CRC32_TABLE:new Array(0,1996959894,3993919788,2567524794,124634137,1886057615,3915621685,2657392035,249268274,2044508324,3772115230,2547177864,162941995,2125561021,3887607047,2428444049,498536548,1789927666,4089016648,2227061214,450548861,1843258603,4107580753,2211677639,325883990,1684777152,4251122042,2321926636,335633487,1661365465,4195302755,2366115317,997073096,1281953886,3579855332,2724688242,1006888145,1258607687,3524101629,2768942443,901097722,1119000684,3686517206,2898065728,853044451,1172266101,3705015759,2882616665,651767980,1373503546,3369554304,3218104598,565507253,1454621731,3485111705,3099436303,671266974,1594198024,3322730930,2970347812,795835527,1483230225,3244367275,3060149565,1994146192,31158534,2563907772,4023717930,1907459465,112637215,2680153253,3904427059,2013776290,251722036,2517215374,3775830040,2137656763,141376813,2439277719,3865271297,1802195444,476864866,2238001368,4066508878,1812370925,453092731,2181625025,4111451223,1706088902,314042704,2344532202,4240017532,1658658271,366619977,2362670323,4224994405,1303535960,984961486,2747007092,3569037538,1256170817,1037604311,2765210733,3554079995,1131014506,879679996,2909243462,3663771856,1141124467,855842277,2852801631,3708648649,1342533948,654459306,3188396048,3373015174,1466479909,544179635,3110523913,3462522015,1591671054,702138776,2966460450,3352799412,1504918807,783551873,3082640443,3233442989,3988292384,2596254646,62317068,1957810842,3939845945,2647816111,81470997,1943803523,3814918930,2489596804,225274430,2053790376,3826175755,2466906013,167816743,2097651377,4027552580,2265490386,503444072,1762050814,4150417245,2154129355,426522225,1852507879,4275313526,2312317920,282753626,1742555852,4189708143,2394877945,397917763,1622183637,3604390888,2714866558,953729732,1340076626,3518719985,2797360999,1068828381,1219638859,3624741850,2936675148,906185462,1090812512,3747672003,2825379669,829329135,1181335161,3412177804,3160834842,628085408,1382605366,3423369109,3138078467,570562233,1426400815,3317316542,2998733608,733239954,1555261956,3268935591,3050360625,752459403,1541320221,2607071920,3965973030,1969922972,40735498,2617837225,3943577151,1913087877,83908371,2512341634,3803740692,2075208622,213261112,2463272603,3855990285,2094854071,198958881,2262029012,4057260610,1759359992,534414190,2176718541,4139329115,1873836001,414664567,2282248934,4279200368,1711684554,285281116,2405801727,4167216745,1634467795,376229701,2685067896,3608007406,1308918612,956543938,2808555105,3495958263,1231636301,1047427035,2932959818,3654703836,1088359270,936918000,2847714899,3736837829,1202900863,817233897,3183342108,3401237130,1404277552,615818150,3134207493,3453421203,1423857449,601450431,3009837614,3294710456,1567103746,711928724,3020668471,3272380065,1510334235,755167117),crc32:function(string){var bytes,crc,i,iTop,n;bytes=this.bytesFor(string);crc=0;n=0;crc=crc^(-1);i=0;iTop=bytes.length;while(i<iTop){n=(crc^bytes[i])&255;crc=(crc>>>8)^this.CRC32_TABLE[n];i++}crc=crc^(-1);if(crc<0){crc+=4294967296}return crc},bytesFor:function(string){var bytes,i;bytes=[];i=0;while(i<string.length){bytes.push(string.charCodeAt(i));++i}return bytes}};
|
data/lib/json/bloomfilter.rb
CHANGED
@@ -5,26 +5,39 @@ require "zlib"
|
|
5
5
|
class JsonBloomfilter
|
6
6
|
DEFAULTS = { "size" => 100, "hashes" => 4, "seed" => Time.new.to_i, "bits" => nil }
|
7
7
|
|
8
|
-
def self.build
|
8
|
+
def self.build capacity_or_items, error_rate
|
9
|
+
capacity, items = capacity_or_items.is_a?(Array) ? [capacity_or_items.length, capacity_or_items] : [capacity_or_items, nil]
|
9
10
|
raise ArgumentError.new("Capacity needs to be a positive integer") if capacity <= 0
|
10
|
-
size
|
11
|
-
|
12
|
-
|
11
|
+
JsonBloomfilter.new :size => size_for(capacity, error_rate), :hashes => hashes_for(capacity, error_rate), items: items
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.size_for capacity, error_rate
|
15
|
+
(capacity * Math.log(error_rate) / Math.log(1.0 / 2**Math.log(2))).ceil
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.hashes_for capacity, error_rate
|
19
|
+
(Math.log(2) * size_for(capacity, error_rate) / capacity).round
|
13
20
|
end
|
14
21
|
|
15
22
|
def initialize options = {}
|
23
|
+
items = options.delete("items")
|
16
24
|
@options = merge_defaults_with options
|
17
25
|
@bits = BitArray.new(@options["size"], @options["bits"])
|
26
|
+
add(items) if items
|
18
27
|
end
|
19
28
|
|
20
|
-
def add
|
21
|
-
|
29
|
+
def add keys
|
30
|
+
[keys].flatten.each do |key|
|
31
|
+
indexes_for(key).each { |index| @bits.add(index) }
|
32
|
+
end
|
22
33
|
nil
|
23
34
|
end
|
24
35
|
|
25
|
-
def test
|
26
|
-
|
27
|
-
|
36
|
+
def test keys
|
37
|
+
[keys].flatten.each do |key|
|
38
|
+
indexes_for(key).each do |index|
|
39
|
+
return false if @bits.get(index) == 0
|
40
|
+
end
|
28
41
|
end
|
29
42
|
true
|
30
43
|
end
|
@@ -7,6 +7,12 @@ describe("JsonBloomfilter", function() {
|
|
7
7
|
expect(bf.toHash()["size"]).toBe(9586);
|
8
8
|
});
|
9
9
|
|
10
|
+
it("should optionally take an array of strings instead of a capacity", function() {
|
11
|
+
bf = JsonBloomfilter.build(["foo", "bar"], 0.01);
|
12
|
+
expect(bf.toHash()["hashes"]).toBe(7);
|
13
|
+
expect(bf.toHash()["size"]).toBe(20);
|
14
|
+
});
|
15
|
+
|
10
16
|
it("should require a positive integer capacity", function() {
|
11
17
|
expect(function(){new JsonBloomfilter.build(0, 0.01)}).toThrow("Capacity needs to be a positive integer")
|
12
18
|
});
|
@@ -50,6 +56,13 @@ describe("JsonBloomfilter", function() {
|
|
50
56
|
expect(bf.test("foo")).toBe(true);
|
51
57
|
});
|
52
58
|
|
59
|
+
it("should be able to add and test more than one key at a time", function() {
|
60
|
+
expect(bf.test("foo")).toBe(false);
|
61
|
+
expect(bf.test("bar")).toBe(false);
|
62
|
+
bf.add(["foo", "bar"]);
|
63
|
+
expect(bf.test(["foo", "bar"])).toBe(true);
|
64
|
+
});
|
65
|
+
|
53
66
|
it("should not change anything if added twice", function() {
|
54
67
|
expect(bf.test("foobar")).toBe(true);
|
55
68
|
bits = bf.toHash()["bits"];
|
@@ -9,6 +9,12 @@ describe JsonBloomfilter do
|
|
9
9
|
expect(bf.to_hash["size"]).to be == 9586
|
10
10
|
end
|
11
11
|
|
12
|
+
it "should optionally take an array of strings instead of a capacity" do
|
13
|
+
bf = JsonBloomfilter.build ["foo", "bar"], 0.01
|
14
|
+
expect(bf.to_hash["hashes"]).to be == 7
|
15
|
+
expect(bf.to_hash["size"]).to be == 20
|
16
|
+
end
|
17
|
+
|
12
18
|
it "should require a capacity of > 0" do
|
13
19
|
expect(lambda{JsonBloomfilter.build 0, 0.01}).to raise_error(ArgumentError)
|
14
20
|
end
|
@@ -44,6 +50,13 @@ describe JsonBloomfilter do
|
|
44
50
|
expect(@bf.test "foo").to be_true
|
45
51
|
end
|
46
52
|
|
53
|
+
it "should be able to add and test more than one key at a time" do
|
54
|
+
expect(@bf.test "foo").to be_false
|
55
|
+
expect(@bf.test "bar").to be_false
|
56
|
+
@bf.add ["foo", "bar"]
|
57
|
+
expect(@bf.test ["foo", "bar"]).to be_true
|
58
|
+
end
|
59
|
+
|
47
60
|
it "should not change anything if added twice" do
|
48
61
|
expect(@bf.test "foobar").to be_true
|
49
62
|
bits = @bf.to_hash["bits"]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json-bloomfilter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-01-
|
12
|
+
date: 2013-01-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
@@ -210,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
210
210
|
version: '0'
|
211
211
|
segments:
|
212
212
|
- 0
|
213
|
-
hash:
|
213
|
+
hash: 762970087584583546
|
214
214
|
requirements: []
|
215
215
|
rubyforge_project:
|
216
216
|
rubygems_version: 1.8.24
|