ingramj-bitarray 0.4.2 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -9,11 +9,17 @@ Example usage:
9
9
  bm[5] = 1
10
10
  bm.toggle_all_bits
11
11
  puts bm
12
+
13
+ bm = BitArray.new("10010110")
14
+ bm.toggle_bit 3
15
+ bm.clear_bit 0
12
16
 
13
17
 
14
18
  The test/ directory has a unit test file. It also has a benchmark utility for
15
19
  comparison with Peter Cooper's pure Ruby BitField class.
16
20
 
21
+ The examples/ directory has bloom filter dictionary-lookup demonstration.
22
+
17
23
  This library has been compiled and tested on:
18
24
 
19
25
  ruby 1.9.1p129 (2009-05-12 revision 23412) [i686-linux] (Ubuntu from src)
data/TODO CHANGED
@@ -1,7 +1,7 @@
1
1
  * Comment code. I'd like for it to be useful as a tutorial for extension
2
2
  writing, especially with regards to implementing new types.
3
- * Implement some more methods, like in-place enumerator methods (map!,
4
- reverse!, etc).
3
+ * BitArray intersection and union ( & and | )
4
+ * In-place enumerator methods (map!, reverse!, etc.)
5
5
  * Maybe allow resizing.
6
6
  * Write more tests
7
7
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.5.0
data/bitarray.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{bitarray}
5
- s.version = "0.4.2"
5
+ s.version = "0.5.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["James E. Ingram"]
9
- s.date = %q{2009-05-30}
9
+ s.date = %q{2009-06-01}
10
10
  s.description = %q{A bit array class for Ruby, implemented as a C extension. Includes methods for setting and clearing individual bits, and all bits at once. Also has the standard array access methods, [] and []=, and it mixes in Enumerable.}
11
11
  s.email = %q{ingramj@gmail.com}
12
12
  s.extensions = ["ext/extconf.rb"]
@@ -22,6 +22,8 @@ Gem::Specification.new do |s|
22
22
  "TODO",
23
23
  "VERSION",
24
24
  "bitarray.gemspec",
25
+ "examples/bloomfilter.rb",
26
+ "examples/boolnet.rb",
25
27
  "ext/bitarray.c",
26
28
  "ext/extconf.rb",
27
29
  "test/bitfield.rb",
@@ -38,7 +40,9 @@ Gem::Specification.new do |s|
38
40
  s.test_files = [
39
41
  "test/bitfield.rb",
40
42
  "test/test.rb",
41
- "test/bm.rb"
43
+ "test/bm.rb",
44
+ "examples/bloomfilter.rb",
45
+ "examples/boolnet.rb"
42
46
  ]
43
47
 
44
48
  if s.respond_to? :specification_version then
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+ require 'digest/sha1'
3
+ require 'bitarray'
4
+
5
+ # This bloom filter was written as a demonstration of the BitArray class.
6
+ # Therefore, it was written with an eye towards simplicity rather than
7
+ # efficiency or optimization.
8
+ #
9
+ # For information about picking parameters for a Bloom Filter, take a look at
10
+ # the Wikipedia page.
11
+ #
12
+ # http://en.wikipedia.org/wiki/Bloom_filter
13
+ #
14
+ class BloomFilter
15
+ def initialize(m = 1000000, k = 3)
16
+ @size = m
17
+ @hashes = k < 3 ? 3 : k
18
+ @ba = BitArray.new(@size)
19
+ end
20
+
21
+ def add(input)
22
+ hash(input).each {|i|
23
+ @ba.set_bit i
24
+ }
25
+ end
26
+
27
+ def include?(input)
28
+ hash(input).each {|i|
29
+ return false if @ba[i] == 0
30
+ }
31
+ return true
32
+ end
33
+
34
+ private
35
+ # Return an array of @hashes indices to set.
36
+ #
37
+ # We generate as many hash values as needed by using the technique described
38
+ # by Kirsch and Mitzenmacher[1].
39
+ #
40
+ # [1] http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
41
+ def hash(input)
42
+ h1 = input.hash.abs % @size
43
+ h2 = Digest::SHA1.hexdigest(input).to_i(16) % @size
44
+
45
+ ha = [h1, h2]
46
+ 1.upto(@hashes - 2) do |i|
47
+ ha << (h1 + i * h2) % @size
48
+ end
49
+ return ha
50
+ end
51
+ end
52
+
53
+
54
+ # As a demonstration, load the contents of the dictionary file and let the user
55
+ # look up words.
56
+ #
57
+ # Using the calculator at http://hur.st/bloomfilter, the optimum number of bits
58
+ # for my system's dictionary file (98569 words), with a false positive rate of
59
+ # 0.001, is 1,417,185 bits (about 173 Kb), and 10 hash functions.
60
+ #
61
+ # Loading the dictionary takes about 4 seconds on my system.
62
+ if __FILE__ == $0
63
+ print "Loading dictionary..."
64
+ bf = BloomFilter.new(1417185, 10)
65
+ File.open('/usr/share/dict/words') {|f|
66
+ f.each_line {|w| bf.add(w.chomp)}
67
+ }
68
+ print "done\n"
69
+
70
+ puts "Enter words to look up, ctrl-d to quit."
71
+ done = false
72
+ while (!done)
73
+ print "Word: "
74
+ s = gets
75
+ if s
76
+ puts "In dictionary: #{bf.include? s.chomp}"
77
+ else
78
+ done = true
79
+ end
80
+ end
81
+ puts
82
+ end
83
+
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bitarray'
4
+
5
+ # A Random Boolean Network. Each bit of the network has two "neighbor" bits and
6
+ # an operation, all chosen at random. At each step, every bit is set to a new
7
+ # value by applying its operation to its neighbors.
8
+ #
9
+ # All such networks eventually fall into a cyclic or fixed-point attractor.
10
+ #
11
+ # See http://en.wikipedia.org/wiki/Boolean_network for more information.
12
+ class BoolNet
13
+ attr_reader :state, :size
14
+ def initialize(size = 80)
15
+ @size = size
16
+ # We use two arrays; one for the current state, and one holding infomation
17
+ # used to update the state.
18
+ @state = random_network(size)
19
+ @update = random_update(size)
20
+ end
21
+
22
+ def step
23
+ old_state = @state.clone
24
+ @update.each_with_index { |u,i|
25
+ case u[0]
26
+ when :and
27
+ @state[i] = old_state[u[1]] & old_state[u[2]]
28
+ when :or
29
+ @state[i] = old_state[u[1]] | old_state[u[2]]
30
+ when :xor
31
+ @state[i] = old_state[u[1]] ^ old_state[u[2]]
32
+ end
33
+ }
34
+ return @state
35
+ end
36
+
37
+ def run(steps = 23)
38
+ puts state
39
+ steps.times {
40
+ puts step
41
+ }
42
+ end
43
+
44
+ private
45
+ def random_network(size)
46
+ ba = BitArray.new(size)
47
+ 0.upto(size - 1) {|b|
48
+ if (rand(2) == 1)
49
+ ba.set_bit b
50
+ end
51
+ }
52
+ return ba
53
+ end
54
+
55
+ def random_update(size)
56
+ # The update array is an array of [op, n1, n2] elements. op is the symbol
57
+ # specifying which operation to use. n1 and n2 are indices of our neighbors
58
+ # in the state array.
59
+ update = Array.new(size)
60
+ 0.upto(size - 1) {|u|
61
+ update[u] = [[:and, :or, :xor][rand(3)], rand(size), rand(size)]
62
+ }
63
+ return update
64
+ end
65
+ end
66
+
67
+ if __FILE__ == $0
68
+ BoolNet.new.run
69
+ end
70
+
data/ext/bitarray.c CHANGED
@@ -172,33 +172,150 @@ rb_bitarray_alloc(VALUE klass)
172
172
  }
173
173
 
174
174
 
175
- /* call-seq:
176
- * BitArray.new(size)
177
- *
178
- * Return a new BitArray or the specified size.
179
- */
175
+ /* Create a new BitArray from a string. Called by rb_bitarray_initialize. */
180
176
  static VALUE
181
- rb_bitarray_initialize(VALUE self, VALUE size)
177
+ rb_bitarray_from_string(VALUE self, VALUE arg)
182
178
  {
183
179
  struct bit_array *ba;
184
180
  Data_Get_Struct(self, struct bit_array, ba);
185
181
 
186
- long bits = NUM2LONG(size);
187
- if (bits <= 0) {
182
+ /* Extract a C-string from arg. */
183
+ long str_len = RSTRING_LEN(arg) + 1;
184
+ char cstr[str_len];
185
+ strncpy(cstr, StringValueCStr(arg), str_len);
186
+
187
+ /* If the string doesn't begin with a '1' or '0', return an empty
188
+ * BitArray.
189
+ */
190
+ if (cstr[0] != '0' && cstr[0] != '1') {
188
191
  ba->bits = 0;
189
192
  ba->array_size = 0;
190
193
  return self;
191
194
  }
192
- long array_size = ((bits - 1) / UINT_BITS) + 1;
193
195
 
194
- ba->bits = bits;
195
- ba->array_size = array_size;
196
- ba->array = ruby_xcalloc(array_size, UINT_BYTES);
196
+ /* Otherwise, loop through the string and truncate it at the first invalid
197
+ * character.
198
+ */
199
+ long i;
200
+ for (i = 0; i < str_len; i++) {
201
+ if (cstr[i] != '0' && cstr[i] != '1') {
202
+ cstr[i] = '\0';
203
+ break;
204
+ }
205
+ }
206
+
207
+ /* Setup the BitArray structure. */
208
+ ba->bits = strlen(cstr);
209
+ ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
210
+ ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
211
+
212
+ /* Initialize the bit array with the string. */
213
+ for (i = 0; i < ba->bits; i++) {
214
+ if (cstr[i] == '0') {
215
+ clear_bit(ba, i);
216
+ } else {
217
+ set_bit(ba, i);
218
+ }
219
+ }
220
+
221
+ return self;
222
+ }
223
+
224
+
225
+ /* Create a new BitArray from an Array. Called by rb_bitarray_initialize */
226
+ static VALUE
227
+ rb_bitarray_from_array(VALUE self, VALUE arg)
228
+ {
229
+ struct bit_array *ba;
230
+ Data_Get_Struct(self, struct bit_array, ba);
231
+
232
+ ba->bits = RARRAY_LEN(arg);
233
+ ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
234
+ ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
235
+
236
+ VALUE e;
237
+ long i;
238
+ for (i = 0; i < ba->bits; i++) {
239
+ e = rb_ary_entry(arg, i);
240
+
241
+ switch (TYPE(e)) {
242
+ case T_FIXNUM: /* fixnums and bignums treated the same. */
243
+ case T_BIGNUM:
244
+ NUM2LONG(e) == 0l ? clear_bit(ba, i) : set_bit(ba, i);
245
+ break;
246
+ case T_FALSE: /* false and nil treated the same. */
247
+ case T_NIL:
248
+ clear_bit(ba, i);
249
+ break;
250
+ default:
251
+ set_bit(ba, i);
252
+ }
253
+ }
197
254
 
198
255
  return self;
199
256
  }
200
257
 
201
258
 
259
+ /* call-seq:
260
+ * BitArray.new(size)
261
+ * BitArray.new(string)
262
+ * BitArray.new(array)
263
+ *
264
+ * When called with a size, creates a new BitArray of the specified size, with
265
+ * all bits cleared. When called with a string or an array, creates a new
266
+ * BitArray from the argument.
267
+ *
268
+ * If a string is given, it should consist of ones and zeroes. If there are
269
+ * any other characters in the string, the first invalid character and all
270
+ * following characters will be ignored.
271
+ *
272
+ * b = BitArray.new("10101010") => 10101010
273
+ * b = BitArray.new("1010abcd") => 1010
274
+ * b = BitArray.new("abcd") =>
275
+ *
276
+ * If an array is given, the BitArray is initialized from its elements using
277
+ * the following rules:
278
+ *
279
+ * 1. 0, false, or nil => 0
280
+ * 2. anything else => 1
281
+ *
282
+ * Note that the 0 is a number, not a string. "Anything else" means strings,
283
+ * symbols, non-zero numbers, subarrays, etc.
284
+ *
285
+ * b = BitArray.new([0,0,0,1,1,0]) => 000110
286
+ * b = BitArray.new([false, true, false]) => 010
287
+ * b = BitArray.new([:a, :b, :c, [:d, :e]]) => 1111
288
+ */
289
+ static VALUE
290
+ rb_bitarray_initialize(VALUE self, VALUE arg)
291
+ {
292
+ if (TYPE(arg) == T_FIXNUM || TYPE(arg) == T_BIGNUM) {
293
+ struct bit_array *ba;
294
+ Data_Get_Struct(self, struct bit_array, ba);
295
+
296
+ long bits = NUM2LONG(arg);
297
+ if (bits <= 0) {
298
+ ba->bits = 0;
299
+ ba->array_size = 0;
300
+ return self;
301
+ }
302
+
303
+ ba->bits = bits;
304
+ ba->array_size = ((bits - 1) / UINT_BITS) + 1;
305
+ ba->array = ruby_xcalloc(ba->array_size, UINT_BYTES);
306
+
307
+ return self;
308
+
309
+ } else if (TYPE(arg) == T_STRING) {
310
+ return rb_bitarray_from_string(self, arg);
311
+ } else if (TYPE(arg) == T_ARRAY) {
312
+ return rb_bitarray_from_array(self, arg);
313
+ } else {
314
+ rb_raise(rb_eArgError, "must be size, string, or array");
315
+ }
316
+ }
317
+
318
+
202
319
  /* call-seq:
203
320
  * bitarray.clone -> a_bitarray
204
321
  * bitarray.dup -> a_bitarray
data/test/bm.rb CHANGED
@@ -6,6 +6,10 @@ Benchmark.bm(28) { |bm|
6
6
  puts "---------------------------- Object instantiation (10,000 iterations)"
7
7
  bm.report("BitField initialize") { 10000.times { BitField.new(256) } }
8
8
  bm.report("BitArray initialize") { 10000.times { BitArray.new(256) } }
9
+ s = "0"*256
10
+ bm.report("BitArray init from string") { 10000.times { BitArray.new(s) } }
11
+ a = [0]*256
12
+ bm.report("BitArray init from array") { 10000.times { BitArray.new(a) } }
9
13
 
10
14
  bf = BitField.new(256)
11
15
  ba = BitArray.new(256)
data/test/test.rb CHANGED
@@ -1,4 +1,5 @@
1
- # Peter Cooper's BitField test file, modified for BitArray.
1
+ # BitArray Unit Tests.
2
+ # Originally modified from Peter Cooper's BitField test file.
2
3
  # http://snippets.dzone.com/posts/show/4234
3
4
  require "test/unit"
4
5
  require "bitarray"
@@ -140,5 +141,24 @@ class TestLibraryFileName < Test::Unit::TestCase
140
141
  ba[5] = 1
141
142
  assert_equal [0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0], ba.to_a
142
143
  end
144
+
145
+ def test_init_from_str
146
+ ba = BitArray.new("00011")
147
+ assert_equal "00011", ba.to_s
148
+ ba = BitArray.new("00011abcd")
149
+ assert_equal "00011", ba.to_s
150
+ ba = BitArray.new("abcd0101")
151
+ assert_equal "", ba.to_s
152
+ end
153
+
154
+
155
+ def test_init_from_array
156
+ ba = BitArray.new([0,1,1,1,0])
157
+ assert_equal "01110", ba.to_s
158
+ ba = BitArray.new([true, true, false, false, true])
159
+ assert_equal "11001", ba.to_s
160
+ ba = BitArray.new([nil, nil, :a, nil, [:b, :c]])
161
+ assert_equal "00101", ba.to_s
162
+ end
143
163
  end
144
164
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingramj-bitarray
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James E. Ingram
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-05-30 00:00:00 -07:00
12
+ date: 2009-06-01 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -30,6 +30,8 @@ files:
30
30
  - TODO
31
31
  - VERSION
32
32
  - bitarray.gemspec
33
+ - examples/bloomfilter.rb
34
+ - examples/boolnet.rb
33
35
  - ext/bitarray.c
34
36
  - ext/extconf.rb
35
37
  - test/bitfield.rb
@@ -69,3 +71,5 @@ test_files:
69
71
  - test/bitfield.rb
70
72
  - test/test.rb
71
73
  - test/bm.rb
74
+ - examples/bloomfilter.rb
75
+ - examples/boolnet.rb