ingramj-bitarray 0.4.2 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -0
- data/TODO +2 -2
- data/VERSION +1 -1
- data/bitarray.gemspec +7 -3
- data/examples/bloomfilter.rb +83 -0
- data/examples/boolnet.rb +70 -0
- data/ext/bitarray.c +129 -12
- data/test/bm.rb +4 -0
- data/test/test.rb +21 -1
- metadata +6 -2
data/README
CHANGED
@@ -9,11 +9,17 @@ Example usage:
|
|
9
9
|
bm[5] = 1
|
10
10
|
bm.toggle_all_bits
|
11
11
|
puts bm
|
12
|
+
|
13
|
+
bm = BitArray.new("10010110")
|
14
|
+
bm.toggle_bit 3
|
15
|
+
bm.clear_bit 0
|
12
16
|
|
13
17
|
|
14
18
|
The test/ directory has a unit test file. It also has a benchmark utility for
|
15
19
|
comparison with Peter Cooper's pure Ruby BitField class.
|
16
20
|
|
21
|
+
The examples/ directory has bloom filter dictionary-lookup demonstration.
|
22
|
+
|
17
23
|
This library has been compiled and tested on:
|
18
24
|
|
19
25
|
ruby 1.9.1p129 (2009-05-12 revision 23412) [i686-linux] (Ubuntu from src)
|
data/TODO
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
* Comment code. I'd like for it to be useful as a tutorial for extension
|
2
2
|
writing, especially with regards to implementing new types.
|
3
|
-
*
|
4
|
-
|
3
|
+
* BitArray intersection and union ( & and | )
|
4
|
+
* In-place enumerator methods (map!, reverse!, etc.)
|
5
5
|
* Maybe allow resizing.
|
6
6
|
* Write more tests
|
7
7
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.5.0
|
data/bitarray.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{bitarray}
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.5.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["James E. Ingram"]
|
9
|
-
s.date = %q{2009-
|
9
|
+
s.date = %q{2009-06-01}
|
10
10
|
s.description = %q{A bit array class for Ruby, implemented as a C extension. Includes methods for setting and clearing individual bits, and all bits at once. Also has the standard array access methods, [] and []=, and it mixes in Enumerable.}
|
11
11
|
s.email = %q{ingramj@gmail.com}
|
12
12
|
s.extensions = ["ext/extconf.rb"]
|
@@ -22,6 +22,8 @@ Gem::Specification.new do |s|
|
|
22
22
|
"TODO",
|
23
23
|
"VERSION",
|
24
24
|
"bitarray.gemspec",
|
25
|
+
"examples/bloomfilter.rb",
|
26
|
+
"examples/boolnet.rb",
|
25
27
|
"ext/bitarray.c",
|
26
28
|
"ext/extconf.rb",
|
27
29
|
"test/bitfield.rb",
|
@@ -38,7 +40,9 @@ Gem::Specification.new do |s|
|
|
38
40
|
s.test_files = [
|
39
41
|
"test/bitfield.rb",
|
40
42
|
"test/test.rb",
|
41
|
-
"test/bm.rb"
|
43
|
+
"test/bm.rb",
|
44
|
+
"examples/bloomfilter.rb",
|
45
|
+
"examples/boolnet.rb"
|
42
46
|
]
|
43
47
|
|
44
48
|
if s.respond_to? :specification_version then
|
@@ -0,0 +1,83 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'digest/sha1'
|
3
|
+
require 'bitarray'
|
4
|
+
|
5
|
+
# This bloom filter was written as a demonstration of the BitArray class.
|
6
|
+
# Therefore, it was written with an eye towards simplicity rather than
|
7
|
+
# efficiency or optimization.
|
8
|
+
#
|
9
|
+
# For information about picking parameters for a Bloom Filter, take a look at
|
10
|
+
# the Wikipedia page.
|
11
|
+
#
|
12
|
+
# http://en.wikipedia.org/wiki/Bloom_filter
|
13
|
+
#
|
14
|
+
class BloomFilter
|
15
|
+
def initialize(m = 1000000, k = 3)
|
16
|
+
@size = m
|
17
|
+
@hashes = k < 3 ? 3 : k
|
18
|
+
@ba = BitArray.new(@size)
|
19
|
+
end
|
20
|
+
|
21
|
+
def add(input)
|
22
|
+
hash(input).each {|i|
|
23
|
+
@ba.set_bit i
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
def include?(input)
|
28
|
+
hash(input).each {|i|
|
29
|
+
return false if @ba[i] == 0
|
30
|
+
}
|
31
|
+
return true
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
# Return an array of @hashes indices to set.
|
36
|
+
#
|
37
|
+
# We generate as many hash values as needed by using the technique described
|
38
|
+
# by Kirsch and Mitzenmacher[1].
|
39
|
+
#
|
40
|
+
# [1] http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
|
41
|
+
def hash(input)
|
42
|
+
h1 = input.hash.abs % @size
|
43
|
+
h2 = Digest::SHA1.hexdigest(input).to_i(16) % @size
|
44
|
+
|
45
|
+
ha = [h1, h2]
|
46
|
+
1.upto(@hashes - 2) do |i|
|
47
|
+
ha << (h1 + i * h2) % @size
|
48
|
+
end
|
49
|
+
return ha
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# As a demonstration, load the contents of the dictionary file and let the user
|
55
|
+
# look up words.
|
56
|
+
#
|
57
|
+
# Using the calculator at http://hur.st/bloomfilter, the optimum number of bits
|
58
|
+
# for my system's dictionary file (98569 words), with a false positive rate of
|
59
|
+
# 0.001, is 1,417,185 bits (about 173 Kb), and 10 hash functions.
|
60
|
+
#
|
61
|
+
# Loading the dictionary takes about 4 seconds on my system.
|
62
|
+
if __FILE__ == $0
|
63
|
+
print "Loading dictionary..."
|
64
|
+
bf = BloomFilter.new(1417185, 10)
|
65
|
+
File.open('/usr/share/dict/words') {|f|
|
66
|
+
f.each_line {|w| bf.add(w.chomp)}
|
67
|
+
}
|
68
|
+
print "done\n"
|
69
|
+
|
70
|
+
puts "Enter words to look up, ctrl-d to quit."
|
71
|
+
done = false
|
72
|
+
while (!done)
|
73
|
+
print "Word: "
|
74
|
+
s = gets
|
75
|
+
if s
|
76
|
+
puts "In dictionary: #{bf.include? s.chomp}"
|
77
|
+
else
|
78
|
+
done = true
|
79
|
+
end
|
80
|
+
end
|
81
|
+
puts
|
82
|
+
end
|
83
|
+
|
data/examples/boolnet.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bitarray'
|
4
|
+
|
5
|
+
# A Random Boolean Network. Each bit of the network has two "neighbor" bits and
|
6
|
+
# an operation, all chosen at random. At each step, every bit is set to a new
|
7
|
+
# value by applying its operation to its neighbors.
|
8
|
+
#
|
9
|
+
# All such networks eventually fall into a cyclic or fixed-point attractor.
|
10
|
+
#
|
11
|
+
# See http://en.wikipedia.org/wiki/Boolean_network for more information.
|
12
|
+
class BoolNet
|
13
|
+
attr_reader :state, :size
|
14
|
+
def initialize(size = 80)
|
15
|
+
@size = size
|
16
|
+
# We use two arrays; one for the current state, and one holding infomation
|
17
|
+
# used to update the state.
|
18
|
+
@state = random_network(size)
|
19
|
+
@update = random_update(size)
|
20
|
+
end
|
21
|
+
|
22
|
+
def step
|
23
|
+
old_state = @state.clone
|
24
|
+
@update.each_with_index { |u,i|
|
25
|
+
case u[0]
|
26
|
+
when :and
|
27
|
+
@state[i] = old_state[u[1]] & old_state[u[2]]
|
28
|
+
when :or
|
29
|
+
@state[i] = old_state[u[1]] | old_state[u[2]]
|
30
|
+
when :xor
|
31
|
+
@state[i] = old_state[u[1]] ^ old_state[u[2]]
|
32
|
+
end
|
33
|
+
}
|
34
|
+
return @state
|
35
|
+
end
|
36
|
+
|
37
|
+
def run(steps = 23)
|
38
|
+
puts state
|
39
|
+
steps.times {
|
40
|
+
puts step
|
41
|
+
}
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def random_network(size)
|
46
|
+
ba = BitArray.new(size)
|
47
|
+
0.upto(size - 1) {|b|
|
48
|
+
if (rand(2) == 1)
|
49
|
+
ba.set_bit b
|
50
|
+
end
|
51
|
+
}
|
52
|
+
return ba
|
53
|
+
end
|
54
|
+
|
55
|
+
def random_update(size)
|
56
|
+
# The update array is an array of [op, n1, n2] elements. op is the symbol
|
57
|
+
# specifying which operation to use. n1 and n2 are indices of our neighbors
|
58
|
+
# in the state array.
|
59
|
+
update = Array.new(size)
|
60
|
+
0.upto(size - 1) {|u|
|
61
|
+
update[u] = [[:and, :or, :xor][rand(3)], rand(size), rand(size)]
|
62
|
+
}
|
63
|
+
return update
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
if __FILE__ == $0
|
68
|
+
BoolNet.new.run
|
69
|
+
end
|
70
|
+
|
data/ext/bitarray.c
CHANGED
@@ -172,33 +172,150 @@ rb_bitarray_alloc(VALUE klass)
|
|
172
172
|
}
|
173
173
|
|
174
174
|
|
175
|
-
/*
|
176
|
-
* BitArray.new(size)
|
177
|
-
*
|
178
|
-
* Return a new BitArray or the specified size.
|
179
|
-
*/
|
175
|
+
/* Create a new BitArray from a string. Called by rb_bitarray_initialize. */
|
180
176
|
static VALUE
|
181
|
-
|
177
|
+
rb_bitarray_from_string(VALUE self, VALUE arg)
|
182
178
|
{
|
183
179
|
struct bit_array *ba;
|
184
180
|
Data_Get_Struct(self, struct bit_array, ba);
|
185
181
|
|
186
|
-
|
187
|
-
|
182
|
+
/* Extract a C-string from arg. */
|
183
|
+
long str_len = RSTRING_LEN(arg) + 1;
|
184
|
+
char cstr[str_len];
|
185
|
+
strncpy(cstr, StringValueCStr(arg), str_len);
|
186
|
+
|
187
|
+
/* If the string doesn't begin with a '1' or '0', return an empty
|
188
|
+
* BitArray.
|
189
|
+
*/
|
190
|
+
if (cstr[0] != '0' && cstr[0] != '1') {
|
188
191
|
ba->bits = 0;
|
189
192
|
ba->array_size = 0;
|
190
193
|
return self;
|
191
194
|
}
|
192
|
-
long array_size = ((bits - 1) / UINT_BITS) + 1;
|
193
195
|
|
194
|
-
|
195
|
-
|
196
|
-
|
196
|
+
/* Otherwise, loop through the string and truncate it at the first invalid
|
197
|
+
* character.
|
198
|
+
*/
|
199
|
+
long i;
|
200
|
+
for (i = 0; i < str_len; i++) {
|
201
|
+
if (cstr[i] != '0' && cstr[i] != '1') {
|
202
|
+
cstr[i] = '\0';
|
203
|
+
break;
|
204
|
+
}
|
205
|
+
}
|
206
|
+
|
207
|
+
/* Setup the BitArray structure. */
|
208
|
+
ba->bits = strlen(cstr);
|
209
|
+
ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
|
210
|
+
ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
|
211
|
+
|
212
|
+
/* Initialize the bit array with the string. */
|
213
|
+
for (i = 0; i < ba->bits; i++) {
|
214
|
+
if (cstr[i] == '0') {
|
215
|
+
clear_bit(ba, i);
|
216
|
+
} else {
|
217
|
+
set_bit(ba, i);
|
218
|
+
}
|
219
|
+
}
|
220
|
+
|
221
|
+
return self;
|
222
|
+
}
|
223
|
+
|
224
|
+
|
225
|
+
/* Create a new BitArray from an Array. Called by rb_bitarray_initialize */
|
226
|
+
static VALUE
|
227
|
+
rb_bitarray_from_array(VALUE self, VALUE arg)
|
228
|
+
{
|
229
|
+
struct bit_array *ba;
|
230
|
+
Data_Get_Struct(self, struct bit_array, ba);
|
231
|
+
|
232
|
+
ba->bits = RARRAY_LEN(arg);
|
233
|
+
ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
|
234
|
+
ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
|
235
|
+
|
236
|
+
VALUE e;
|
237
|
+
long i;
|
238
|
+
for (i = 0; i < ba->bits; i++) {
|
239
|
+
e = rb_ary_entry(arg, i);
|
240
|
+
|
241
|
+
switch (TYPE(e)) {
|
242
|
+
case T_FIXNUM: /* fixnums and bignums treated the same. */
|
243
|
+
case T_BIGNUM:
|
244
|
+
NUM2LONG(e) == 0l ? clear_bit(ba, i) : set_bit(ba, i);
|
245
|
+
break;
|
246
|
+
case T_FALSE: /* false and nil treated the same. */
|
247
|
+
case T_NIL:
|
248
|
+
clear_bit(ba, i);
|
249
|
+
break;
|
250
|
+
default:
|
251
|
+
set_bit(ba, i);
|
252
|
+
}
|
253
|
+
}
|
197
254
|
|
198
255
|
return self;
|
199
256
|
}
|
200
257
|
|
201
258
|
|
259
|
+
/* call-seq:
|
260
|
+
* BitArray.new(size)
|
261
|
+
* BitArray.new(string)
|
262
|
+
* BitArray.new(array)
|
263
|
+
*
|
264
|
+
* When called with a size, creates a new BitArray of the specified size, with
|
265
|
+
* all bits cleared. When called with a string or an array, creates a new
|
266
|
+
* BitArray from the argument.
|
267
|
+
*
|
268
|
+
* If a string is given, it should consist of ones and zeroes. If there are
|
269
|
+
* any other characters in the string, the first invalid character and all
|
270
|
+
* following characters will be ignored.
|
271
|
+
*
|
272
|
+
* b = BitArray.new("10101010") => 10101010
|
273
|
+
* b = BitArray.new("1010abcd") => 1010
|
274
|
+
* b = BitArray.new("abcd") =>
|
275
|
+
*
|
276
|
+
* If an array is given, the BitArray is initialized from its elements using
|
277
|
+
* the following rules:
|
278
|
+
*
|
279
|
+
* 1. 0, false, or nil => 0
|
280
|
+
* 2. anything else => 1
|
281
|
+
*
|
282
|
+
* Note that the 0 is a number, not a string. "Anything else" means strings,
|
283
|
+
* symbols, non-zero numbers, subarrays, etc.
|
284
|
+
*
|
285
|
+
* b = BitArray.new([0,0,0,1,1,0]) => 000110
|
286
|
+
* b = BitArray.new([false, true, false]) => 010
|
287
|
+
* b = BitArray.new([:a, :b, :c, [:d, :e]]) => 1111
|
288
|
+
*/
|
289
|
+
static VALUE
|
290
|
+
rb_bitarray_initialize(VALUE self, VALUE arg)
|
291
|
+
{
|
292
|
+
if (TYPE(arg) == T_FIXNUM || TYPE(arg) == T_BIGNUM) {
|
293
|
+
struct bit_array *ba;
|
294
|
+
Data_Get_Struct(self, struct bit_array, ba);
|
295
|
+
|
296
|
+
long bits = NUM2LONG(arg);
|
297
|
+
if (bits <= 0) {
|
298
|
+
ba->bits = 0;
|
299
|
+
ba->array_size = 0;
|
300
|
+
return self;
|
301
|
+
}
|
302
|
+
|
303
|
+
ba->bits = bits;
|
304
|
+
ba->array_size = ((bits - 1) / UINT_BITS) + 1;
|
305
|
+
ba->array = ruby_xcalloc(ba->array_size, UINT_BYTES);
|
306
|
+
|
307
|
+
return self;
|
308
|
+
|
309
|
+
} else if (TYPE(arg) == T_STRING) {
|
310
|
+
return rb_bitarray_from_string(self, arg);
|
311
|
+
} else if (TYPE(arg) == T_ARRAY) {
|
312
|
+
return rb_bitarray_from_array(self, arg);
|
313
|
+
} else {
|
314
|
+
rb_raise(rb_eArgError, "must be size, string, or array");
|
315
|
+
}
|
316
|
+
}
|
317
|
+
|
318
|
+
|
202
319
|
/* call-seq:
|
203
320
|
* bitarray.clone -> a_bitarray
|
204
321
|
* bitarray.dup -> a_bitarray
|
data/test/bm.rb
CHANGED
@@ -6,6 +6,10 @@ Benchmark.bm(28) { |bm|
|
|
6
6
|
puts "---------------------------- Object instantiation (10,000 iterations)"
|
7
7
|
bm.report("BitField initialize") { 10000.times { BitField.new(256) } }
|
8
8
|
bm.report("BitArray initialize") { 10000.times { BitArray.new(256) } }
|
9
|
+
s = "0"*256
|
10
|
+
bm.report("BitArray init from string") { 10000.times { BitArray.new(s) } }
|
11
|
+
a = [0]*256
|
12
|
+
bm.report("BitArray init from array") { 10000.times { BitArray.new(a) } }
|
9
13
|
|
10
14
|
bf = BitField.new(256)
|
11
15
|
ba = BitArray.new(256)
|
data/test/test.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# BitArray Unit Tests.
|
2
|
+
# Originally modified from Peter Cooper's BitField test file.
|
2
3
|
# http://snippets.dzone.com/posts/show/4234
|
3
4
|
require "test/unit"
|
4
5
|
require "bitarray"
|
@@ -140,5 +141,24 @@ class TestLibraryFileName < Test::Unit::TestCase
|
|
140
141
|
ba[5] = 1
|
141
142
|
assert_equal [0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0], ba.to_a
|
142
143
|
end
|
144
|
+
|
145
|
+
def test_init_from_str
|
146
|
+
ba = BitArray.new("00011")
|
147
|
+
assert_equal "00011", ba.to_s
|
148
|
+
ba = BitArray.new("00011abcd")
|
149
|
+
assert_equal "00011", ba.to_s
|
150
|
+
ba = BitArray.new("abcd0101")
|
151
|
+
assert_equal "", ba.to_s
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def test_init_from_array
|
156
|
+
ba = BitArray.new([0,1,1,1,0])
|
157
|
+
assert_equal "01110", ba.to_s
|
158
|
+
ba = BitArray.new([true, true, false, false, true])
|
159
|
+
assert_equal "11001", ba.to_s
|
160
|
+
ba = BitArray.new([nil, nil, :a, nil, [:b, :c]])
|
161
|
+
assert_equal "00101", ba.to_s
|
162
|
+
end
|
143
163
|
end
|
144
164
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ingramj-bitarray
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James E. Ingram
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-01 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
@@ -30,6 +30,8 @@ files:
|
|
30
30
|
- TODO
|
31
31
|
- VERSION
|
32
32
|
- bitarray.gemspec
|
33
|
+
- examples/bloomfilter.rb
|
34
|
+
- examples/boolnet.rb
|
33
35
|
- ext/bitarray.c
|
34
36
|
- ext/extconf.rb
|
35
37
|
- test/bitfield.rb
|
@@ -69,3 +71,5 @@ test_files:
|
|
69
71
|
- test/bitfield.rb
|
70
72
|
- test/test.rb
|
71
73
|
- test/bm.rb
|
74
|
+
- examples/bloomfilter.rb
|
75
|
+
- examples/boolnet.rb
|