bitwise 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1 @@
1
+ source "http://rubygems.org"
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Kenn Ejima
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ Bitwise
2
+ =======
3
+
4
+ Fast, memory efficient bitwise operations on large binary strings.
5
+
6
+ Internally a bit array is represented as a ruby string with `Encoding::ASCII_8BIT` encoding, which keeps billions of bits in a workable footprint.
7
+
8
+ * 1,000,000 bits = 125KB
9
+ * 10,000,000 bits = 1.25MB
10
+ * 100,000,000 bits = 12.5MB
11
+ * 1,000,000,000 bits = 125MB
12
+
13
+ Install
14
+ -------
15
+
16
+ gem install bitwise
17
+
18
+ Usage
19
+ -----
20
+
21
+ Bitwise assignment and retrieval:
22
+
23
+ ```ruby
24
+ b = Bitwise.new(1)
25
+
26
+ b.to_bits
27
+ => "00000000"
28
+
29
+ b.set_at(1)
30
+ b.set_at(4)
31
+
32
+ b.to_bits
33
+ => "01001000"
34
+
35
+ b.clear_at(1)
36
+
37
+ b.to_bits
38
+ => "00001000"
39
+ ```
40
+
41
+ Index assignment and retrieval:
42
+
43
+ ```ruby
44
+ b = Bitwise.new
45
+ b.indexes = [1, 10, 100]
46
+
47
+ b.to_bits
48
+ => "01000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000"
49
+
50
+ b.cardinality
51
+ => 3
52
+
53
+ b.size
54
+ => 13
55
+
56
+ b.set_at(20)
57
+
58
+ b.to_bits
59
+ => "01000000001000000000100000000000000000000000000000000000000000000000000000000000000000000000000000001000"
60
+
61
+ b.value.unpack('C*')
62
+ => [64, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8]
63
+
64
+ b.cardinality
65
+ => 4
66
+
67
+ b.indexes
68
+ => [1, 10, 20, 100]
69
+ ```
data/Rakefile ADDED
@@ -0,0 +1,29 @@
1
+ # require "bundler/gem_tasks"
2
+ # require 'rubygems'
3
+ # require 'bundler'
4
+
5
+ require "rake/extensiontask"
6
+ Rake::ExtensionTask.new("bitwise") do |extension|
7
+ extension.lib_dir = "lib/bitwise"
8
+ end
9
+
10
+ require 'rspec/core/rake_task'
11
+ task :default => :spec
12
+ RSpec::Core::RakeTask.new(:spec) do |t|
13
+ t.rspec_opts = ["--color"]
14
+ t.fail_on_error = false
15
+ end
16
+
17
+ require 'jeweler'
18
+ Jeweler::Tasks.new do |gem|
19
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
20
+ gem.name = "bitwise"
21
+ gem.homepage = "http://github.com/kenn/bitwise"
22
+ gem.license = "MIT"
23
+ gem.summary = %Q{Fast, memory efficient bitwise operations on large binary strings}
24
+ gem.description = %Q{Fast, memory efficient bitwise operations on large binary strings}
25
+ gem.email = "kenn.ejima@gmail.com"
26
+ gem.authors = ["Kenn Ejima"]
27
+ # dependencies defined in Gemfile
28
+ end
29
+ Jeweler::RubygemsDotOrgTasks.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bitwise.gemspec ADDED
@@ -0,0 +1,49 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bitwise"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kenn Ejima"]
12
+ s.date = "2011-12-13"
13
+ s.description = "Fast, memory efficient bitwise operations on large binary strings"
14
+ s.email = "kenn.ejima@gmail.com"
15
+ s.extensions = ["ext/bitwise/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.md"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ "Gemfile",
23
+ "LICENSE.txt",
24
+ "README.md",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "bitwise.gemspec",
28
+ "ext/bitwise/bitwise.c",
29
+ "ext/bitwise/extconf.rb",
30
+ "lib/bitwise.rb",
31
+ "lib/bitwise/bitwise.bundle",
32
+ "spec/bitwise_spec.rb"
33
+ ]
34
+ s.homepage = "http://github.com/kenn/bitwise"
35
+ s.licenses = ["MIT"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = "1.8.10"
38
+ s.summary = "Fast, memory efficient bitwise operations on large binary strings"
39
+
40
+ if s.respond_to? :specification_version then
41
+ s.specification_version = 3
42
+
43
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ else
45
+ end
46
+ else
47
+ end
48
+ end
49
+
@@ -0,0 +1,61 @@
1
+ #include "ruby.h"
2
+
3
+ VALUE BitwiseClass;
4
+
5
+ int COUNT_TABLE[] = {
6
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
7
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
8
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
9
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
10
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
11
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
12
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
13
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
14
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
15
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
16
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
17
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
18
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
19
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
20
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
21
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
22
+ };
23
+
24
+ static VALUE bw_string_union(VALUE self, VALUE max, VALUE min)
25
+ {
26
+ VALUE result = rb_str_new(RSTRING_PTR(max), RSTRING_LEN(max));
27
+ int i;
28
+ int min_len = RSTRING_LEN(min);
29
+ for (i = 0; i < RSTRING_LEN(max); i++) {
30
+ RSTRING_PTR(result)[i] |= ((i < min_len) ? RSTRING_PTR(min)[i] : 0);
31
+ }
32
+ return result;
33
+ }
34
+
35
+ static VALUE bw_string_intersect(VALUE self, VALUE max, VALUE min)
36
+ {
37
+ VALUE result = rb_str_new(RSTRING_PTR(min), RSTRING_LEN(min));
38
+ int i;
39
+ for (i = 0; i < RSTRING_LEN(min); i++) {
40
+ RSTRING_PTR(result)[i] &= RSTRING_PTR(max)[i];
41
+ }
42
+ return result;
43
+ }
44
+
45
+ static VALUE bw_population_count(VALUE self, VALUE str) {
46
+ int count, i;
47
+ unsigned char *buffer = RSTRING_PTR(str);
48
+ count = 0;
49
+ for (i = 0; i < RSTRING_LEN(str); i++) {
50
+ count += COUNT_TABLE[buffer[i]];
51
+ }
52
+ return INT2NUM(count);
53
+ }
54
+
55
+ void Init_bitwise()
56
+ {
57
+ BitwiseClass = rb_define_class("Bitwise", rb_cObject);
58
+ rb_define_singleton_method(BitwiseClass, "string_union", bw_string_union, 2);
59
+ rb_define_singleton_method(BitwiseClass, "string_intersect", bw_string_intersect, 2);
60
+ rb_define_singleton_method(BitwiseClass, "population_count", bw_population_count, 1);
61
+ }
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile("bitwise")
data/lib/bitwise.rb ADDED
@@ -0,0 +1,105 @@
1
+ # coding: ascii-8bit
2
+
3
+ require "bitwise/bitwise"
4
+
5
+ class Bitwise
6
+ attr_accessor :value
7
+
8
+ def initialize(size = 0)
9
+ @value = "\x00" * size
10
+ end
11
+
12
+ def size
13
+ @value.bytesize
14
+ end
15
+ alias :to_s :size
16
+
17
+ def to_bits
18
+ @value.unpack('B*').first
19
+ end
20
+
21
+ def set_at(index)
22
+ get_byte(index)
23
+ @value.setbyte(@div, @byte | bitmask)
24
+ end
25
+
26
+ def clear_at(index)
27
+ get_byte(index)
28
+ @value.setbyte(@div, @byte & ~bitmask)
29
+ end
30
+
31
+ def set_at?(index)
32
+ get_bit(index) == 1
33
+ end
34
+
35
+ def clear_at?(index)
36
+ get_bit(index) == 0
37
+ end
38
+
39
+ def get_bit(index)
40
+ get_byte(index)
41
+ (@byte & bitmask) > 0 ? 1 : 0
42
+ end
43
+
44
+ def bitmask
45
+ 2**(7 - @mod)
46
+ end
47
+
48
+ def get_byte(index)
49
+ @div, @mod = index.divmod(8)
50
+ @byte = @value.getbyte(@div)
51
+ end
52
+
53
+ def intersect(other)
54
+ min, max = [ self.value, other.value ].sort_by{|i| i.bytesize }
55
+ result = Bitwise.new
56
+ result.value = Bitwise.string_intersect(max, min)
57
+ result
58
+ end
59
+ alias :& :intersect
60
+
61
+ def union(other)
62
+ min, max = [ self.value, other.value ].sort_by{|i| i.bytesize }
63
+ result = Bitwise.new
64
+ result.value = Bitwise.string_union(max, min)
65
+ result
66
+ end
67
+ alias :| :union
68
+
69
+ def value=(string)
70
+ @value = string.force_encoding(Encoding::ASCII_8BIT)
71
+ @value.bytesize
72
+ end
73
+
74
+ def indexes=(array)
75
+ max_index = array.max
76
+ @value = "\x00" * (max_index.div(8) + 1)
77
+ array.each do |index|
78
+ set_at(index)
79
+ end
80
+ @value.bytesize
81
+ end
82
+
83
+ def indexes
84
+ indexes = []
85
+ position = 0
86
+ @value.each_byte do |c|
87
+ BITS_TABLE[c].each do |i|
88
+ indexes << (position*8 + i)
89
+ end
90
+ position += 1
91
+ end
92
+ indexes
93
+ end
94
+
95
+ def cardinality
96
+ Bitwise.population_count(self.value)
97
+ end
98
+
99
+ BITS_TABLE = (0..255).map do |i|
100
+ (0..7).map do |j|
101
+ j = 7 - j
102
+ ((i & 2**j) > 0) ? (7 - j) : nil
103
+ end.compact
104
+ end
105
+ end
Binary file
@@ -0,0 +1,96 @@
1
+ require 'bitwise'
2
+ require 'set'
3
+
4
+ describe Bitwise do
5
+ before do
6
+ @bitwise = Bitwise.new(1)
7
+ end
8
+
9
+ it "should set and clear" do
10
+ @bitwise.to_bits.should == '00000000'
11
+
12
+ @bitwise.set_at 1
13
+ @bitwise.set_at 2
14
+ @bitwise.set_at 3
15
+ @bitwise.set_at 5
16
+ @bitwise.to_bits.should == '01110100'
17
+ @bitwise.cardinality.should == 4
18
+
19
+ @bitwise.clear_at(1)
20
+ @bitwise.clear_at(3)
21
+ @bitwise.to_bits.should == '00100100'
22
+ @bitwise.cardinality.should == 2
23
+ end
24
+
25
+ it "should set by indexes" do
26
+ @bitwise.indexes = [1,10]
27
+ @bitwise.indexes.should == [1,10]
28
+ @bitwise.to_bits.should == '0100000000100000'
29
+ @bitwise.cardinality.should == 2
30
+ end
31
+
32
+ describe "benchmark" do
33
+ def assign_indexes(total, picks)
34
+ set = Set.new
35
+
36
+ picks.times do
37
+ set << (rand*total).to_i
38
+ end
39
+
40
+ @indexes = set.to_a
41
+ end
42
+
43
+ def measure
44
+ start = Time.now.to_f
45
+ yield
46
+ Time.now.to_f - start
47
+ end
48
+
49
+ it "indexes assignment" do
50
+ assign_indexes(1000, 10)
51
+ measure { @bitwise.indexes = @indexes }.should < 0.0001
52
+
53
+ assign_indexes(10_000, 100)
54
+ measure { @bitwise.indexes = @indexes }.should < 0.001
55
+
56
+ assign_indexes(100_000, 1000)
57
+ measure { @bitwise.indexes = @indexes }.should < 0.01
58
+
59
+ assign_indexes(1000_000, 10_000)
60
+ measure { @bitwise.indexes = @indexes }.should < 0.1
61
+ end
62
+
63
+ it "cardinality sparse" do
64
+ @bitwise.indexes = [1, 10, 100, 1000, 10_000, 100_000, 1000_000, 10_000_000, 100_000_000]
65
+ measure { @bitwise.cardinality.should == 9 }.should < 1.0
66
+ end
67
+
68
+ it "cardinality dense" do
69
+ assign_indexes(10_000_000, 100_000)
70
+ @bitwise.indexes = @indexes
71
+ measure { @bitwise.cardinality }.should < 1.0
72
+ end
73
+
74
+ it "union sparse" do
75
+ b1 = Bitwise.new
76
+ b2 = Bitwise.new
77
+ b1.indexes = [1, 10, 100, 1000, 10_000, 100_000, 1000_000, 10_000_000, 100_000_000]
78
+ b2.indexes = [2, 20, 200, 2000, 20_000, 200_000, 2000_000, 20_000_000, 200_000_000]
79
+ measure do
80
+ b3 = b1.union(b2)
81
+ b3.cardinality.should == 18
82
+ end.should < 1.0
83
+ end
84
+
85
+ it "intersect sparse" do
86
+ b1 = Bitwise.new
87
+ b2 = Bitwise.new
88
+ b1.indexes = [1, 10, 100, 1000, 10_000, 100_000, 1000_000, 10_000_000, 100_000_000]
89
+ b2.indexes = [2, 20, 200, 2000, 20_000, 200_000, 2000_000, 20_000_000, 200_000_000]
90
+ measure do
91
+ b3 = b1.intersect(b2)
92
+ b3.cardinality.should == 0
93
+ end.should < 1.0
94
+ end
95
+ end
96
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bitwise
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kenn Ejima
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-13 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Fast, memory efficient bitwise operations on large binary strings
15
+ email: kenn.ejima@gmail.com
16
+ executables: []
17
+ extensions:
18
+ - ext/bitwise/extconf.rb
19
+ extra_rdoc_files:
20
+ - LICENSE.txt
21
+ - README.md
22
+ files:
23
+ - .document
24
+ - Gemfile
25
+ - LICENSE.txt
26
+ - README.md
27
+ - Rakefile
28
+ - VERSION
29
+ - bitwise.gemspec
30
+ - ext/bitwise/bitwise.c
31
+ - ext/bitwise/extconf.rb
32
+ - lib/bitwise.rb
33
+ - lib/bitwise/bitwise.bundle
34
+ - spec/bitwise_spec.rb
35
+ homepage: http://github.com/kenn/bitwise
36
+ licenses:
37
+ - MIT
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 1.8.10
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Fast, memory efficient bitwise operations on large binary strings
60
+ test_files: []