ingramj-bitarray 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -9,11 +9,17 @@ Example usage:
9
9
  bm[5] = 1
10
10
  bm.toggle_all_bits
11
11
  puts bm
12
+
13
+ bm = BitArray.new("10010110")
14
+ bm.toggle_bit 3
15
+ bm.clear_bit 0
12
16
 
13
17
 
14
18
  The test/ directory has a unit test file. It also has a benchmark utility for
15
19
  comparison with Peter Cooper's pure Ruby BitField class.
16
20
 
21
+ The examples/ directory has bloom filter dictionary-lookup demonstration.
22
+
17
23
  This library has been compiled and tested on:
18
24
 
19
25
  ruby 1.9.1p129 (2009-05-12 revision 23412) [i686-linux] (Ubuntu from src)
data/TODO CHANGED
@@ -1,7 +1,7 @@
1
1
  * Comment code. I'd like for it to be useful as a tutorial for extension
2
2
  writing, especially with regards to implementing new types.
3
- * Implement some more methods, like in-place enumerator methods (map!,
4
- reverse!, etc).
3
+ * BitArray intersection and union ( & and | )
4
+ * In-place enumerator methods (map!, reverse!, etc.)
5
5
  * Maybe allow resizing.
6
6
  * Write more tests
7
7
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.2
1
+ 0.5.0
data/bitarray.gemspec CHANGED
@@ -2,11 +2,11 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = %q{bitarray}
5
- s.version = "0.4.2"
5
+ s.version = "0.5.0"
6
6
 
7
7
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
8
  s.authors = ["James E. Ingram"]
9
- s.date = %q{2009-05-30}
9
+ s.date = %q{2009-06-01}
10
10
  s.description = %q{A bit array class for Ruby, implemented as a C extension. Includes methods for setting and clearing individual bits, and all bits at once. Also has the standard array access methods, [] and []=, and it mixes in Enumerable.}
11
11
  s.email = %q{ingramj@gmail.com}
12
12
  s.extensions = ["ext/extconf.rb"]
@@ -22,6 +22,8 @@ Gem::Specification.new do |s|
22
22
  "TODO",
23
23
  "VERSION",
24
24
  "bitarray.gemspec",
25
+ "examples/bloomfilter.rb",
26
+ "examples/boolnet.rb",
25
27
  "ext/bitarray.c",
26
28
  "ext/extconf.rb",
27
29
  "test/bitfield.rb",
@@ -38,7 +40,9 @@ Gem::Specification.new do |s|
38
40
  s.test_files = [
39
41
  "test/bitfield.rb",
40
42
  "test/test.rb",
41
- "test/bm.rb"
43
+ "test/bm.rb",
44
+ "examples/bloomfilter.rb",
45
+ "examples/boolnet.rb"
42
46
  ]
43
47
 
44
48
  if s.respond_to? :specification_version then
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env ruby
2
+ require 'digest/sha1'
3
+ require 'bitarray'
4
+
5
+ # This bloom filter was written as a demonstration of the BitArray class.
6
+ # Therefore, it was written with an eye towards simplicity rather than
7
+ # efficiency or optimization.
8
+ #
9
+ # For information about picking parameters for a Bloom Filter, take a look at
10
+ # the Wikipedia page.
11
+ #
12
+ # http://en.wikipedia.org/wiki/Bloom_filter
13
+ #
14
+ class BloomFilter
15
+ def initialize(m = 1000000, k = 3)
16
+ @size = m
17
+ @hashes = k < 3 ? 3 : k
18
+ @ba = BitArray.new(@size)
19
+ end
20
+
21
+ def add(input)
22
+ hash(input).each {|i|
23
+ @ba.set_bit i
24
+ }
25
+ end
26
+
27
+ def include?(input)
28
+ hash(input).each {|i|
29
+ return false if @ba[i] == 0
30
+ }
31
+ return true
32
+ end
33
+
34
+ private
35
+ # Return an array of @hashes indices to set.
36
+ #
37
+ # We generate as many hash values as needed by using the technique described
38
+ # by Kirsch and Mitzenmacher[1].
39
+ #
40
+ # [1] http://www.eecs.harvard.edu/~kirsch/pubs/bbbf/esa06.pdf
41
+ def hash(input)
42
+ h1 = input.hash.abs % @size
43
+ h2 = Digest::SHA1.hexdigest(input).to_i(16) % @size
44
+
45
+ ha = [h1, h2]
46
+ 1.upto(@hashes - 2) do |i|
47
+ ha << (h1 + i * h2) % @size
48
+ end
49
+ return ha
50
+ end
51
+ end
52
+
53
+
54
+ # As a demonstration, load the contents of the dictionary file and let the user
55
+ # look up words.
56
+ #
57
+ # Using the calculator at http://hur.st/bloomfilter, the optimum number of bits
58
+ # for my system's dictionary file (98569 words), with a false positive rate of
59
+ # 0.001, is 1,417,185 bits (about 173 Kb), and 10 hash functions.
60
+ #
61
+ # Loading the dictionary takes about 4 seconds on my system.
62
+ if __FILE__ == $0
63
+ print "Loading dictionary..."
64
+ bf = BloomFilter.new(1417185, 10)
65
+ File.open('/usr/share/dict/words') {|f|
66
+ f.each_line {|w| bf.add(w.chomp)}
67
+ }
68
+ print "done\n"
69
+
70
+ puts "Enter words to look up, ctrl-d to quit."
71
+ done = false
72
+ while (!done)
73
+ print "Word: "
74
+ s = gets
75
+ if s
76
+ puts "In dictionary: #{bf.include? s.chomp}"
77
+ else
78
+ done = true
79
+ end
80
+ end
81
+ puts
82
+ end
83
+
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bitarray'
4
+
5
+ # A Random Boolean Network. Each bit of the network has two "neighbor" bits and
6
+ # an operation, all chosen at random. At each step, every bit is set to a new
7
+ # value by applying its operation to its neighbors.
8
+ #
9
+ # All such networks eventually fall into a cyclic or fixed-point attractor.
10
+ #
11
+ # See http://en.wikipedia.org/wiki/Boolean_network for more information.
12
+ class BoolNet
13
+ attr_reader :state, :size
14
+ def initialize(size = 80)
15
+ @size = size
16
+ # We use two arrays; one for the current state, and one holding infomation
17
+ # used to update the state.
18
+ @state = random_network(size)
19
+ @update = random_update(size)
20
+ end
21
+
22
+ def step
23
+ old_state = @state.clone
24
+ @update.each_with_index { |u,i|
25
+ case u[0]
26
+ when :and
27
+ @state[i] = old_state[u[1]] & old_state[u[2]]
28
+ when :or
29
+ @state[i] = old_state[u[1]] | old_state[u[2]]
30
+ when :xor
31
+ @state[i] = old_state[u[1]] ^ old_state[u[2]]
32
+ end
33
+ }
34
+ return @state
35
+ end
36
+
37
+ def run(steps = 23)
38
+ puts state
39
+ steps.times {
40
+ puts step
41
+ }
42
+ end
43
+
44
+ private
45
+ def random_network(size)
46
+ ba = BitArray.new(size)
47
+ 0.upto(size - 1) {|b|
48
+ if (rand(2) == 1)
49
+ ba.set_bit b
50
+ end
51
+ }
52
+ return ba
53
+ end
54
+
55
+ def random_update(size)
56
+ # The update array is an array of [op, n1, n2] elements. op is the symbol
57
+ # specifying which operation to use. n1 and n2 are indices of our neighbors
58
+ # in the state array.
59
+ update = Array.new(size)
60
+ 0.upto(size - 1) {|u|
61
+ update[u] = [[:and, :or, :xor][rand(3)], rand(size), rand(size)]
62
+ }
63
+ return update
64
+ end
65
+ end
66
+
67
+ if __FILE__ == $0
68
+ BoolNet.new.run
69
+ end
70
+
data/ext/bitarray.c CHANGED
@@ -172,33 +172,150 @@ rb_bitarray_alloc(VALUE klass)
172
172
  }
173
173
 
174
174
 
175
- /* call-seq:
176
- * BitArray.new(size)
177
- *
178
- * Return a new BitArray or the specified size.
179
- */
175
+ /* Create a new BitArray from a string. Called by rb_bitarray_initialize. */
180
176
  static VALUE
181
- rb_bitarray_initialize(VALUE self, VALUE size)
177
+ rb_bitarray_from_string(VALUE self, VALUE arg)
182
178
  {
183
179
  struct bit_array *ba;
184
180
  Data_Get_Struct(self, struct bit_array, ba);
185
181
 
186
- long bits = NUM2LONG(size);
187
- if (bits <= 0) {
182
+ /* Extract a C-string from arg. */
183
+ long str_len = RSTRING_LEN(arg) + 1;
184
+ char cstr[str_len];
185
+ strncpy(cstr, StringValueCStr(arg), str_len);
186
+
187
+ /* If the string doesn't begin with a '1' or '0', return an empty
188
+ * BitArray.
189
+ */
190
+ if (cstr[0] != '0' && cstr[0] != '1') {
188
191
  ba->bits = 0;
189
192
  ba->array_size = 0;
190
193
  return self;
191
194
  }
192
- long array_size = ((bits - 1) / UINT_BITS) + 1;
193
195
 
194
- ba->bits = bits;
195
- ba->array_size = array_size;
196
- ba->array = ruby_xcalloc(array_size, UINT_BYTES);
196
+ /* Otherwise, loop through the string and truncate it at the first invalid
197
+ * character.
198
+ */
199
+ long i;
200
+ for (i = 0; i < str_len; i++) {
201
+ if (cstr[i] != '0' && cstr[i] != '1') {
202
+ cstr[i] = '\0';
203
+ break;
204
+ }
205
+ }
206
+
207
+ /* Setup the BitArray structure. */
208
+ ba->bits = strlen(cstr);
209
+ ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
210
+ ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
211
+
212
+ /* Initialize the bit array with the string. */
213
+ for (i = 0; i < ba->bits; i++) {
214
+ if (cstr[i] == '0') {
215
+ clear_bit(ba, i);
216
+ } else {
217
+ set_bit(ba, i);
218
+ }
219
+ }
220
+
221
+ return self;
222
+ }
223
+
224
+
225
+ /* Create a new BitArray from an Array. Called by rb_bitarray_initialize */
226
+ static VALUE
227
+ rb_bitarray_from_array(VALUE self, VALUE arg)
228
+ {
229
+ struct bit_array *ba;
230
+ Data_Get_Struct(self, struct bit_array, ba);
231
+
232
+ ba->bits = RARRAY_LEN(arg);
233
+ ba->array_size = ((ba->bits - 1) / UINT_BITS) + 1;
234
+ ba->array = ruby_xmalloc(ba->array_size * UINT_BYTES);
235
+
236
+ VALUE e;
237
+ long i;
238
+ for (i = 0; i < ba->bits; i++) {
239
+ e = rb_ary_entry(arg, i);
240
+
241
+ switch (TYPE(e)) {
242
+ case T_FIXNUM: /* fixnums and bignums treated the same. */
243
+ case T_BIGNUM:
244
+ NUM2LONG(e) == 0l ? clear_bit(ba, i) : set_bit(ba, i);
245
+ break;
246
+ case T_FALSE: /* false and nil treated the same. */
247
+ case T_NIL:
248
+ clear_bit(ba, i);
249
+ break;
250
+ default:
251
+ set_bit(ba, i);
252
+ }
253
+ }
197
254
 
198
255
  return self;
199
256
  }
200
257
 
201
258
 
259
+ /* call-seq:
260
+ * BitArray.new(size)
261
+ * BitArray.new(string)
262
+ * BitArray.new(array)
263
+ *
264
+ * When called with a size, creates a new BitArray of the specified size, with
265
+ * all bits cleared. When called with a string or an array, creates a new
266
+ * BitArray from the argument.
267
+ *
268
+ * If a string is given, it should consist of ones and zeroes. If there are
269
+ * any other characters in the string, the first invalid character and all
270
+ * following characters will be ignored.
271
+ *
272
+ * b = BitArray.new("10101010") => 10101010
273
+ * b = BitArray.new("1010abcd") => 1010
274
+ * b = BitArray.new("abcd") =>
275
+ *
276
+ * If an array is given, the BitArray is initialized from its elements using
277
+ * the following rules:
278
+ *
279
+ * 1. 0, false, or nil => 0
280
+ * 2. anything else => 1
281
+ *
282
+ * Note that the 0 is a number, not a string. "Anything else" means strings,
283
+ * symbols, non-zero numbers, subarrays, etc.
284
+ *
285
+ * b = BitArray.new([0,0,0,1,1,0]) => 000110
286
+ * b = BitArray.new([false, true, false]) => 010
287
+ * b = BitArray.new([:a, :b, :c, [:d, :e]]) => 1111
288
+ */
289
+ static VALUE
290
+ rb_bitarray_initialize(VALUE self, VALUE arg)
291
+ {
292
+ if (TYPE(arg) == T_FIXNUM || TYPE(arg) == T_BIGNUM) {
293
+ struct bit_array *ba;
294
+ Data_Get_Struct(self, struct bit_array, ba);
295
+
296
+ long bits = NUM2LONG(arg);
297
+ if (bits <= 0) {
298
+ ba->bits = 0;
299
+ ba->array_size = 0;
300
+ return self;
301
+ }
302
+
303
+ ba->bits = bits;
304
+ ba->array_size = ((bits - 1) / UINT_BITS) + 1;
305
+ ba->array = ruby_xcalloc(ba->array_size, UINT_BYTES);
306
+
307
+ return self;
308
+
309
+ } else if (TYPE(arg) == T_STRING) {
310
+ return rb_bitarray_from_string(self, arg);
311
+ } else if (TYPE(arg) == T_ARRAY) {
312
+ return rb_bitarray_from_array(self, arg);
313
+ } else {
314
+ rb_raise(rb_eArgError, "must be size, string, or array");
315
+ }
316
+ }
317
+
318
+
202
319
  /* call-seq:
203
320
  * bitarray.clone -> a_bitarray
204
321
  * bitarray.dup -> a_bitarray
data/test/bm.rb CHANGED
@@ -6,6 +6,10 @@ Benchmark.bm(28) { |bm|
6
6
  puts "---------------------------- Object instantiation (10,000 iterations)"
7
7
  bm.report("BitField initialize") { 10000.times { BitField.new(256) } }
8
8
  bm.report("BitArray initialize") { 10000.times { BitArray.new(256) } }
9
+ s = "0"*256
10
+ bm.report("BitArray init from string") { 10000.times { BitArray.new(s) } }
11
+ a = [0]*256
12
+ bm.report("BitArray init from array") { 10000.times { BitArray.new(a) } }
9
13
 
10
14
  bf = BitField.new(256)
11
15
  ba = BitArray.new(256)
data/test/test.rb CHANGED
@@ -1,4 +1,5 @@
1
- # Peter Cooper's BitField test file, modified for BitArray.
1
+ # BitArray Unit Tests.
2
+ # Originally modified from Peter Cooper's BitField test file.
2
3
  # http://snippets.dzone.com/posts/show/4234
3
4
  require "test/unit"
4
5
  require "bitarray"
@@ -140,5 +141,24 @@ class TestLibraryFileName < Test::Unit::TestCase
140
141
  ba[5] = 1
141
142
  assert_equal [0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0], ba.to_a
142
143
  end
144
+
145
+ def test_init_from_str
146
+ ba = BitArray.new("00011")
147
+ assert_equal "00011", ba.to_s
148
+ ba = BitArray.new("00011abcd")
149
+ assert_equal "00011", ba.to_s
150
+ ba = BitArray.new("abcd0101")
151
+ assert_equal "", ba.to_s
152
+ end
153
+
154
+
155
+ def test_init_from_array
156
+ ba = BitArray.new([0,1,1,1,0])
157
+ assert_equal "01110", ba.to_s
158
+ ba = BitArray.new([true, true, false, false, true])
159
+ assert_equal "11001", ba.to_s
160
+ ba = BitArray.new([nil, nil, :a, nil, [:b, :c]])
161
+ assert_equal "00101", ba.to_s
162
+ end
143
163
  end
144
164
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ingramj-bitarray
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.2
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James E. Ingram
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-05-30 00:00:00 -07:00
12
+ date: 2009-06-01 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -30,6 +30,8 @@ files:
30
30
  - TODO
31
31
  - VERSION
32
32
  - bitarray.gemspec
33
+ - examples/bloomfilter.rb
34
+ - examples/boolnet.rb
33
35
  - ext/bitarray.c
34
36
  - ext/extconf.rb
35
37
  - test/bitfield.rb
@@ -69,3 +71,5 @@ test_files:
69
71
  - test/bitfield.rb
70
72
  - test/test.rb
71
73
  - test/bm.rb
74
+ - examples/bloomfilter.rb
75
+ - examples/boolnet.rb