ingramj-bitarray 0.4.2 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -0
- data/TODO +2 -2
- data/VERSION +1 -1
- data/bitarray.gemspec +7 -3
- data/examples/bloomfilter.rb +83 -0
- data/examples/boolnet.rb +70 -0
- data/ext/bitarray.c +129 -12
- data/test/bm.rb +4 -0
- data/test/test.rb +21 -1
- metadata +6 -2
data/README
CHANGED
@@ -9,11 +9,17 @@ Example usage:
|
|
9
9
|
bm[5] = 1
|
10
10
|
bm.toggle_all_bits
|
11
11
|
puts bm
|
12
|
+
|
13
|
+
bm = BitArray.new("10010110")
|
14
|
+
bm.toggle_bit 3
|
15
|
+
bm.clear_bit 0
|
12
16
|
|
13
17
|
|
14
18
|
The test/ directory has a unit test file. It also has a benchmark utility for
|
15
19
|
comparison with Peter Cooper's pure Ruby BitField class.
|
16
20
|
|
21
|
+
The examples/ directory has bloom filter dictionary-lookup demonstration.
|
22
|
+
|
17
23
|
This library has been compiled and tested on:
|
18
24
|
|
19
25
|
ruby 1.9.1p129 (2009-05-12 revision 23412) [i686-linux] (Ubuntu from src)
|
data/TODO
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
* Comment code. I'd like for it to be useful as a tutorial for extension
|
2
2
|
writing, especially with regards to implementing new types.
|
3
|
-
*
|
4
|
-
|
3
|
+
* BitArray intersection and union ( & and | )
|
4
|
+
* In-place enumerator methods (map!, reverse!, etc.)
|
5
5
|
* Maybe allow resizing.
|
6
6
|
* Write more tests
|
7
7
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bitarray.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{bitarray}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.5.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["James E. Ingram"]
|
9
|
-
s.date = %q{2009-
|
9
|
+
s.date = %q{2009-06-01}
|
10
10
|
s.description = %q{A bit array class for Ruby, implemented as a C extension. Includes methods for setting and clearing individual bits, and all bits at once. Also has the standard array access methods, [] and []=, and it mixes in Enumerable.}
|
11
11
|
s.email = %q{ingramj@gmail.com}
|
12
12
|
s.extensions = ["ext/extconf.rb"]
|
@@ -22,6 +22,8 @@ Gem::Specification.new do |s|
|
|
22
22
|
"TODO",
|
23
23
|
"VERSION",
|
24
24
|
"bitarray.gemspec",
|
25
|
+
"examples/bloomfilter.rb",
|
26
|
+
"examples/boolnet.rb",
|
25
27
|
"ext/bitarray.c",
|
26
28
|
"ext/extconf.rb",
|
27
29
|
"test/bitfield.rb",
|
@@ -38,7 +40,9 @@ Gem::Specification.new do |s|
|
|
38
40
|
s.test_files = [
|
39
41
|
"test/bitfield.rb",
|
40
42
|
"test/test.rb",
|
41
|
-
"test/bm.rb"
|
43
|
+
"test/bm.rb",
|
44
|
+
"examples/bloomfilter.rb",
|
45
|
+
"examples/boolnet.rb"
|
42
46
|
]
|
43
47
|
|
44
48
|
if s.respond_to? :specification_version then
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'digest/sha1'
|
3
|
+
require 'bitarray'
|
4
|
+
|
5
|
+
# This bloom filter was written as a demonstration of the BitArray class.
|
6
|
+
# Therefore, it was written with an eye towards simplicity rather than
|
7
|
+
# efficiency or optimization.
|
8
|
+
#
|
9
|
+
# For information about picking parameters for a Bloom Filter, take a look at
|
10
|
+
# the Wikipedia page.
|
11
|
+
#
|
12
|
+
# http://en.wikipedia.org/wiki/Bloom_filter
|
13
|
+
#
|
14
|
+
class BloomFilter
|
15
|
+
def initialize(m = 1000000, k = 3)
|
16
|
+
@size = m
|
17
|
+
@hashes = k < 3 ? 3 : k
|
18
|
+
@ba = BitArray.new(@size)
|
19
|
+
end
|
20
|
+
|
21
|
+
def add(input)
|
22
|
+
hash(input).each {|i|
|
23
|
+
@ba.set_bit i
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def include?(input)
|
28
|
+
hash(input).each {|i|
|
29
|
+
return false if @ba[i] == 0
|
30
|
+
}
|
31
|
+
return true
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
# Return an array of @hashes indices to set.
|
36
|
+
#
|
37
|
+
# We generate as many hash values as needed by using the technique described
|
38
|
+
# by Kirsch and Mitzenmacher[1].
|
39
|
+
#
|
40
|
+
# [1] http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
|
41
|
+
def hash(input)
|
42
|
+
h1 = input.hash.abs % @size
|
43
|
+
h2 = Digest::SHA1.hexdigest(input).to_i(16) % @size
|
44
|
+
|
45
|
+
ha = [h1, h2]
|
46
|
+
1.upto(@hashes - 2) do |i|
|
47
|
+
ha << (h1 + i * h2) % @size
|
48
|
+
end
|
49
|
+
return ha
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# As a demonstration, load the contents of the dictionary file and let the user
|
55
|
+
# look up words.
|
56
|
+
#
|
57
|
+
# Using the calculator at http://hur.st/bloomfilter, the optimum number of bits
|
58
|
+
# for my system's dictionary file (98569 words), with a false positive rate of
|
59
|
+
# 0.001, is 1,417,185 bits (about 173 Kb), and 10 hash functions.
|
60
|
+
#
|
61
|
+
# Loading the dictionary takes about 4 seconds on my system.
|
62
|
+
if __FILE__ == $0
|
63
|
+
print "Loading dictionary..."
|
64
|
+
bf = BloomFilter.new(1417185, 10)
|
65
|
+
File.open('/usr/share/dict/words') {|f|
|
66
|
+
f.each_line {|w| bf.add(w.chomp)}
|
67
|
+
}
|
68
|
+
print "done\n"
|
69
|
+
|
70
|
+
puts "Enter words to look up, ctrl-d to quit."
|
71
|
+
done = false
|
72
|
+
while (!done)
|
73
|
+
print "Word: "
|
74
|
+
s = gets
|
75
|
+
if s
|
76
|
+
puts "In dictionary: #{bf.include? s.chomp}"
|
77
|
+
else
|
78
|
+
done = true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
puts
|
82
|
+
end
|
83
|
+
|
data/examples/boolnet.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bitarray'
|
4
|
+
|
5
|
+
# A Random Boolean Network. Each bit of the network has two "neighbor" bits and
|
6
|
+
# an operation, all chosen at random. At each step, every bit is set to a new
|
7
|
+
# value by applying its operation to its neighbors.
|
8
|
+
#
|
9
|
+
# All such networks eventually fall into a cyclic or fixed-point attractor.
|
10
|
+
#
|
11
|
+
# See http://en.wikipedia.org/wiki/Boolean_network for more information.
|
12
|
+
class BoolNet
|
13
|
+
attr_reader :state, :size
|
14
|
+
def initialize(size = 80)
|
15
|
+
@size = size
|
16
|
+
# We use two arrays; one for the current state, and one holding infomation
|
17
|
+
# used to update the state.
|
18
|
+
@state = random_network(size)
|
19
|
+
@update = random_update(size)
|
20
|
+
end
|
21
|
+
|
22
|
+
def step
|
23
|
+
old_state = @state.clone
|
24
|
+
@update.each_with_index { |u,i|
|
25
|
+
case u[0]
|
26
|
+
when :and
|
27
|
+
@state[i] = old_state[u[1]] & old_state[u[2]]
|
28
|
+
when :or
|
29
|
+
@state[i] = old_state[u[1]] | old_state[u[2]]
|
30
|
+
when :xor
|
31
|
+
@state[i] = old_state[u[1]] ^ old_state[u[2]]
|
32
|
+
end
|
33
|
+
}
|
34
|
+
return @state
|
35
|
+
end
|
36
|
+
|
37
|
+
def run(steps = 23)
|
38
|
+
puts state
|
39
|
+
steps.times {
|
40
|
+
puts step
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def random_network(size)
|
46
|
+
ba = BitArray.new(size)
|
47
|
+
0.upto(size - 1) {|b|
|
48
|
+
if (rand(2) == 1)
|
49
|
+
ba.set_bit b
|
50
|
+
end
|
51
|
+
}
|
52
|
+
return ba
|
53
|
+
end
|
54
|
+
|
55
|
+
def random_update(size)
|
56
|
+
# The update array is an array of [op, n1, n2] elements. op is the symbol
|
57
|
+
# specifying which operation to use. n1 and n2 are indices of our neighbors
|
58
|
+
# in the state array.
|
59
|
+
update = Array.new(size)
|
60
|
+
0.upto(size - 1) {|u|
|
61
|
+
update[u] = [[:and, :or, :xor][rand(3)], rand(size), rand(size)]
|
62
|
+
}
|
63
|
+
return update
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
if __FILE__ == $0
|
68
|
+
BoolNet.new.run
|
69
|
+
end
|
70
|
+
|
data/ext/bitarray.c
CHANGED
@@ -172,33 +172,150 @@ rb_bitarray_alloc(VALUE klass)
|
|
172
172
|
}
|
173
173
|
|
174
174
|
|
175
|
-
/*
|
176
|
-
* BitArray.new(size)
|
177
|
-
*
|
178
|
-
* Return a new BitArray or the specified size.
|
179
|
-
*/
|
175
|
+
/* Create a new BitArray from a string. Called by rb_bitarray_initialize. */
|
180
176
|
static VALUE
|
181
|
-
|
177
|
+
rb_bitarray_from_string(VALUE self, VALUE arg)
|
182
178
|
{
|
183
179
|
struct bit_array *ba;
|
184
180
|
Data_Get_Struct(self, struct bit_array, ba);
|
185
181
|
|
186
|
-
|
187
|
-
|
182
|
+
/* Extract a C-string from arg. */
|
183
|
+
long str_len = RSTRING_LEN(arg) + 1;
|
184
|
+
char cstr[str_len];
|
185
|
+
strncpy(cstr, StringValueCStr(arg), str_len);
|
186
|
+
|
187
|
+
/* If the string doesn't begin with a '1' or '0', return an empty
|
188
|
+
* BitArray.
|
189
|
+
*/
|
190
|
+
if (cstr[0] != '0' && cstr[0] != '1') {
|
188
191
|
ba->bits = 0;
|
189
192
|
ba->array_size = 0;
|
190
193
|
return self;
|
191
194
|
}
|
192
|
-
long array_size = ((bits - 1) / UINT_BITS) + 1;
|
193
195
|
|
194
|
-
|
195
|
-
|
196
|
-
|
196
|
+
/* Otherwise, loop through the string and truncate it at the first invalid
|
197
|
+
* character.
|
198
|
+
*/
|
199
|
+
long i;
|
200
|
+
for (i = 0; i < str_len; i++) {
|
201
|
+
if (cstr[i] != '0' && cstr[i] != '1') {
|
202
|
+
cstr[i] = '\0';
|
203
|
+
break;
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
/* Setup the BitArray structure. */
|
208
|
+
ba->bits = strlen(cstr);
|
209
|
+
ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
|
210
|
+
ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
|
211
|
+
|
212
|
+
/* Initialize the bit array with the string. */
|
213
|
+
for (i = 0; i < ba->bits; i++) {
|
214
|
+
if (cstr[i] == '0') {
|
215
|
+
clear_bit(ba, i);
|
216
|
+
} else {
|
217
|
+
set_bit(ba, i);
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
return self;
|
222
|
+
}
|
223
|
+
|
224
|
+
|
225
|
+
/* Create a new BitArray from an Array. Called by rb_bitarray_initialize */
|
226
|
+
static VALUE
|
227
|
+
rb_bitarray_from_array(VALUE self, VALUE arg)
|
228
|
+
{
|
229
|
+
struct bit_array *ba;
|
230
|
+
Data_Get_Struct(self, struct bit_array, ba);
|
231
|
+
|
232
|
+
ba->bits = RARRAY_LEN(arg);
|
233
|
+
ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
|
234
|
+
ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
|
235
|
+
|
236
|
+
VALUE e;
|
237
|
+
long i;
|
238
|
+
for (i = 0; i < ba->bits; i++) {
|
239
|
+
e = rb_ary_entry(arg, i);
|
240
|
+
|
241
|
+
switch (TYPE(e)) {
|
242
|
+
case T_FIXNUM: /* fixnums and bignums treated the same. */
|
243
|
+
case T_BIGNUM:
|
244
|
+
NUM2LONG(e) == 0l ? clear_bit(ba, i) : set_bit(ba, i);
|
245
|
+
break;
|
246
|
+
case T_FALSE: /* false and nil treated the same. */
|
247
|
+
case T_NIL:
|
248
|
+
clear_bit(ba, i);
|
249
|
+
break;
|
250
|
+
default:
|
251
|
+
set_bit(ba, i);
|
252
|
+
}
|
253
|
+
}
|
197
254
|
|
198
255
|
return self;
|
199
256
|
}
|
200
257
|
|
201
258
|
|
259
|
+
/* call-seq:
|
260
|
+
* BitArray.new(size)
|
261
|
+
* BitArray.new(string)
|
262
|
+
* BitArray.new(array)
|
263
|
+
*
|
264
|
+
* When called with a size, creates a new BitArray of the specified size, with
|
265
|
+
* all bits cleared. When called with a string or an array, creates a new
|
266
|
+
* BitArray from the argument.
|
267
|
+
*
|
268
|
+
* If a string is given, it should consist of ones and zeroes. If there are
|
269
|
+
* any other characters in the string, the first invalid character and all
|
270
|
+
* following characters will be ignored.
|
271
|
+
*
|
272
|
+
* b = BitArray.new("10101010") => 10101010
|
273
|
+
* b = BitArray.new("1010abcd") => 1010
|
274
|
+
* b = BitArray.new("abcd") =>
|
275
|
+
*
|
276
|
+
* If an array is given, the BitArray is initialized from its elements using
|
277
|
+
* the following rules:
|
278
|
+
*
|
279
|
+
* 1. 0, false, or nil => 0
|
280
|
+
* 2. anything else => 1
|
281
|
+
*
|
282
|
+
* Note that the 0 is a number, not a string. "Anything else" means strings,
|
283
|
+
* symbols, non-zero numbers, subarrays, etc.
|
284
|
+
*
|
285
|
+
* b = BitArray.new([0,0,0,1,1,0]) => 000110
|
286
|
+
* b = BitArray.new([false, true, false]) => 010
|
287
|
+
* b = BitArray.new([:a, :b, :c, [:d, :e]]) => 1111
|
288
|
+
*/
|
289
|
+
static VALUE
|
290
|
+
rb_bitarray_initialize(VALUE self, VALUE arg)
|
291
|
+
{
|
292
|
+
if (TYPE(arg) == T_FIXNUM || TYPE(arg) == T_BIGNUM) {
|
293
|
+
struct bit_array *ba;
|
294
|
+
Data_Get_Struct(self, struct bit_array, ba);
|
295
|
+
|
296
|
+
long bits = NUM2LONG(arg);
|
297
|
+
if (bits <= 0) {
|
298
|
+
ba->bits = 0;
|
299
|
+
ba->array_size = 0;
|
300
|
+
return self;
|
301
|
+
}
|
302
|
+
|
303
|
+
ba->bits = bits;
|
304
|
+
ba->array_size = ((bits - 1) / UINT_BITS) + 1;
|
305
|
+
ba->array = ruby_xcalloc(ba->array_size, UINT_BYTES);
|
306
|
+
|
307
|
+
return self;
|
308
|
+
|
309
|
+
} else if (TYPE(arg) == T_STRING) {
|
310
|
+
return rb_bitarray_from_string(self, arg);
|
311
|
+
} else if (TYPE(arg) == T_ARRAY) {
|
312
|
+
return rb_bitarray_from_array(self, arg);
|
313
|
+
} else {
|
314
|
+
rb_raise(rb_eArgError, "must be size, string, or array");
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
|
202
319
|
/* call-seq:
|
203
320
|
* bitarray.clone -> a_bitarray
|
204
321
|
* bitarray.dup -> a_bitarray
|
data/test/bm.rb
CHANGED
@@ -6,6 +6,10 @@ Benchmark.bm(28) { |bm|
|
|
6
6
|
puts "---------------------------- Object instantiation (10,000 iterations)"
|
7
7
|
bm.report("BitField initialize") { 10000.times { BitField.new(256) } }
|
8
8
|
bm.report("BitArray initialize") { 10000.times { BitArray.new(256) } }
|
9
|
+
s = "0"*256
|
10
|
+
bm.report("BitArray init from string") { 10000.times { BitArray.new(s) } }
|
11
|
+
a = [0]*256
|
12
|
+
bm.report("BitArray init from array") { 10000.times { BitArray.new(a) } }
|
9
13
|
|
10
14
|
bf = BitField.new(256)
|
11
15
|
ba = BitArray.new(256)
|
data/test/test.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# BitArray Unit Tests.
|
2
|
+
# Originally modified from Peter Cooper's BitField test file.
|
2
3
|
# http://snippets.dzone.com/posts/show/4234
|
3
4
|
require "test/unit"
|
4
5
|
require "bitarray"
|
@@ -140,5 +141,24 @@ class TestLibraryFileName < Test::Unit::TestCase
|
|
140
141
|
ba[5] = 1
|
141
142
|
assert_equal [0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0], ba.to_a
|
142
143
|
end
|
144
|
+
|
145
|
+
def test_init_from_str
|
146
|
+
ba = BitArray.new("00011")
|
147
|
+
assert_equal "00011", ba.to_s
|
148
|
+
ba = BitArray.new("00011abcd")
|
149
|
+
assert_equal "00011", ba.to_s
|
150
|
+
ba = BitArray.new("abcd0101")
|
151
|
+
assert_equal "", ba.to_s
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def test_init_from_array
|
156
|
+
ba = BitArray.new([0,1,1,1,0])
|
157
|
+
assert_equal "01110", ba.to_s
|
158
|
+
ba = BitArray.new([true, true, false, false, true])
|
159
|
+
assert_equal "11001", ba.to_s
|
160
|
+
ba = BitArray.new([nil, nil, :a, nil, [:b, :c]])
|
161
|
+
assert_equal "00101", ba.to_s
|
162
|
+
end
|
143
163
|
end
|
144
164
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingramj-bitarray
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James E. Ingram
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-01 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -30,6 +30,8 @@ files:
|
|
30
30
|
- TODO
|
31
31
|
- VERSION
|
32
32
|
- bitarray.gemspec
|
33
|
+
- examples/bloomfilter.rb
|
34
|
+
- examples/boolnet.rb
|
33
35
|
- ext/bitarray.c
|
34
36
|
- ext/extconf.rb
|
35
37
|
- test/bitfield.rb
|
@@ -69,3 +71,5 @@ test_files:
|
|
69
71
|
- test/bitfield.rb
|
70
72
|
- test/test.rb
|
71
73
|
- test/bm.rb
|
74
|
+
- examples/bloomfilter.rb
|
75
|
+
- examples/boolnet.rb
|