bitwise 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/Gemfile ADDED
@@ -0,0 +1 @@
1
+ source "http://rubygems.org"
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Kenn Ejima
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,69 @@
1
+ Bitwise
2
+ =======
3
+
4
+ Fast, memory efficient bitwise operations on large binary strings.
5
+
6
+ Internally a bit array is represented as a ruby string with `Encoding::ASCII_8BIT` encoding, which keeps billions of bits in a workable footprint.
7
+
8
+ * 1,000,000 bits = 125KB
9
+ * 10,000,000 bits = 1.25MB
10
+ * 100,000,000 bits = 12.5MB
11
+ * 1,000,000,000 bits = 125MB
12
+
13
+ Install
14
+ -------
15
+
16
+ gem install bitwise
17
+
18
+ Usage
19
+ -----
20
+
21
+ Bitwise assignment and retrieval:
22
+
23
+ ```ruby
24
+ b = Bitwise.new(1)
25
+
26
+ b.to_bits
27
+ => "00000000"
28
+
29
+ b.set_at(1)
30
+ b.set_at(4)
31
+
32
+ b.to_bits
33
+ => "01001000"
34
+
35
+ b.clear_at(1)
36
+
37
+ b.to_bits
38
+ => "00001000"
39
+ ```
40
+
41
+ Index assignment and retrieval:
42
+
43
+ ```ruby
44
+ b = Bitwise.new
45
+ b.indexes = [1, 10, 100]
46
+
47
+ b.to_bits
48
+ => "01000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000"
49
+
50
+ b.cardinality
51
+ => 3
52
+
53
+ b.size
54
+ => 13
55
+
56
+ b.set_at(20)
57
+
58
+ b.to_bits
59
+ => "01000000001000000000100000000000000000000000000000000000000000000000000000000000000000000000000000001000"
60
+
61
+ b.value.unpack('C*')
62
+ => [64, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8]
63
+
64
+ b.cardinality
65
+ => 4
66
+
67
+ b.indexes
68
+ => [1, 10, 20, 100]
69
+ ```
data/Rakefile ADDED
@@ -0,0 +1,29 @@
1
+ # require "bundler/gem_tasks"
2
+ # require 'rubygems'
3
+ # require 'bundler'
4
+
5
+ require "rake/extensiontask"
6
+ Rake::ExtensionTask.new("bitwise") do |extension|
7
+ extension.lib_dir = "lib/bitwise"
8
+ end
9
+
10
+ require 'rspec/core/rake_task'
11
+ task :default => :spec
12
+ RSpec::Core::RakeTask.new(:spec) do |t|
13
+ t.rspec_opts = ["--color"]
14
+ t.fail_on_error = false
15
+ end
16
+
17
+ require 'jeweler'
18
+ Jeweler::Tasks.new do |gem|
19
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
20
+ gem.name = "bitwise"
21
+ gem.homepage = "http://github.com/kenn/bitwise"
22
+ gem.license = "MIT"
23
+ gem.summary = %Q{Fast, memory efficient bitwise operations on large binary strings}
24
+ gem.description = %Q{Fast, memory efficient bitwise operations on large binary strings}
25
+ gem.email = "kenn.ejima@gmail.com"
26
+ gem.authors = ["Kenn Ejima"]
27
+ # dependencies defined in Gemfile
28
+ end
29
+ Jeweler::RubygemsDotOrgTasks.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
data/bitwise.gemspec ADDED
@@ -0,0 +1,49 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "bitwise"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Kenn Ejima"]
12
+ s.date = "2011-12-13"
13
+ s.description = "Fast, memory efficient bitwise operations on large binary strings"
14
+ s.email = "kenn.ejima@gmail.com"
15
+ s.extensions = ["ext/bitwise/extconf.rb"]
16
+ s.extra_rdoc_files = [
17
+ "LICENSE.txt",
18
+ "README.md"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ "Gemfile",
23
+ "LICENSE.txt",
24
+ "README.md",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "bitwise.gemspec",
28
+ "ext/bitwise/bitwise.c",
29
+ "ext/bitwise/extconf.rb",
30
+ "lib/bitwise.rb",
31
+ "lib/bitwise/bitwise.bundle",
32
+ "spec/bitwise_spec.rb"
33
+ ]
34
+ s.homepage = "http://github.com/kenn/bitwise"
35
+ s.licenses = ["MIT"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = "1.8.10"
38
+ s.summary = "Fast, memory efficient bitwise operations on large binary strings"
39
+
40
+ if s.respond_to? :specification_version then
41
+ s.specification_version = 3
42
+
43
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
44
+ else
45
+ end
46
+ else
47
+ end
48
+ end
49
+
@@ -0,0 +1,61 @@
1
+ #include "ruby.h"
2
+
3
+ VALUE BitwiseClass;
4
+
5
+ int COUNT_TABLE[] = {
6
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
7
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
8
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
9
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
10
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
11
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
12
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
13
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
14
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
15
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
16
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
17
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
18
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
19
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
20
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
21
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
22
+ };
23
+
24
+ static VALUE bw_string_union(VALUE self, VALUE max, VALUE min)
25
+ {
26
+ VALUE result = rb_str_new(RSTRING_PTR(max), RSTRING_LEN(max));
27
+ int i;
28
+ int min_len = RSTRING_LEN(min);
29
+ for (i = 0; i < RSTRING_LEN(max); i++) {
30
+ RSTRING_PTR(result)[i] |= ((i < min_len) ? RSTRING_PTR(min)[i] : 0);
31
+ }
32
+ return result;
33
+ }
34
+
35
+ static VALUE bw_string_intersect(VALUE self, VALUE max, VALUE min)
36
+ {
37
+ VALUE result = rb_str_new(RSTRING_PTR(min), RSTRING_LEN(min));
38
+ int i;
39
+ for (i = 0; i < RSTRING_LEN(min); i++) {
40
+ RSTRING_PTR(result)[i] &= RSTRING_PTR(max)[i];
41
+ }
42
+ return result;
43
+ }
44
+
45
+ static VALUE bw_population_count(VALUE self, VALUE str) {
46
+ int count, i;
47
+ unsigned char *buffer = RSTRING_PTR(str);
48
+ count = 0;
49
+ for (i = 0; i < RSTRING_LEN(str); i++) {
50
+ count += COUNT_TABLE[buffer[i]];
51
+ }
52
+ return INT2NUM(count);
53
+ }
54
+
55
+ void Init_bitwise()
56
+ {
57
+ BitwiseClass = rb_define_class("Bitwise", rb_cObject);
58
+ rb_define_singleton_method(BitwiseClass, "string_union", bw_string_union, 2);
59
+ rb_define_singleton_method(BitwiseClass, "string_intersect", bw_string_intersect, 2);
60
+ rb_define_singleton_method(BitwiseClass, "population_count", bw_population_count, 1);
61
+ }
@@ -0,0 +1,2 @@
1
+ require 'mkmf'
2
+ create_makefile("bitwise")
data/lib/bitwise.rb ADDED
@@ -0,0 +1,105 @@
1
+ # coding: ascii-8bit
2
+
3
+ require "bitwise/bitwise"
4
+
5
+ class Bitwise
6
+ attr_accessor :value
7
+
8
+ def initialize(size = 0)
9
+ @value = "\x00" * size
10
+ end
11
+
12
+ def size
13
+ @value.bytesize
14
+ end
15
+ alias :to_s :size
16
+
17
+ def to_bits
18
+ @value.unpack('B*').first
19
+ end
20
+
21
+ def set_at(index)
22
+ get_byte(index)
23
+ @value.setbyte(@div, @byte | bitmask)
24
+ end
25
+
26
+ def clear_at(index)
27
+ get_byte(index)
28
+ @value.setbyte(@div, @byte & ~bitmask)
29
+ end
30
+
31
+ def set_at?(index)
32
+ get_bit(index) == 1
33
+ end
34
+
35
+ def clear_at?(index)
36
+ get_bit(index) == 0
37
+ end
38
+
39
+ def get_bit(index)
40
+ get_byte(index)
41
+ (@byte & bitmask) > 0 ? 1 : 0
42
+ end
43
+
44
+ def bitmask
45
+ 2**(7 - @mod)
46
+ end
47
+
48
+ def get_byte(index)
49
+ @div, @mod = index.divmod(8)
50
+ @byte = @value.getbyte(@div)
51
+ end
52
+
53
+ def intersect(other)
54
+ min, max = [ self.value, other.value ].sort_by{|i| i.bytesize }
55
+ result = Bitwise.new
56
+ result.value = Bitwise.string_intersect(max, min)
57
+ result
58
+ end
59
+ alias :& :intersect
60
+
61
+ def union(other)
62
+ min, max = [ self.value, other.value ].sort_by{|i| i.bytesize }
63
+ result = Bitwise.new
64
+ result.value = Bitwise.string_union(max, min)
65
+ result
66
+ end
67
+ alias :| :union
68
+
69
+ def value=(string)
70
+ @value = string.force_encoding(Encoding::ASCII_8BIT)
71
+ @value.bytesize
72
+ end
73
+
74
+ def indexes=(array)
75
+ max_index = array.max
76
+ @value = "\x00" * (max_index.div(8) + 1)
77
+ array.each do |index|
78
+ set_at(index)
79
+ end
80
+ @value.bytesize
81
+ end
82
+
83
+ def indexes
84
+ indexes = []
85
+ position = 0
86
+ @value.each_byte do |c|
87
+ BITS_TABLE[c].each do |i|
88
+ indexes << (position*8 + i)
89
+ end
90
+ position += 1
91
+ end
92
+ indexes
93
+ end
94
+
95
+ def cardinality
96
+ Bitwise.population_count(self.value)
97
+ end
98
+
99
+ BITS_TABLE = (0..255).map do |i|
100
+ (0..7).map do |j|
101
+ j = 7 - j
102
+ ((i & 2**j) > 0) ? (7 - j) : nil
103
+ end.compact
104
+ end
105
+ end
Binary file
@@ -0,0 +1,96 @@
1
+ require 'bitwise'
2
+ require 'set'
3
+
4
+ describe Bitwise do
5
+ before do
6
+ @bitwise = Bitwise.new(1)
7
+ end
8
+
9
+ it "should set and clear" do
10
+ @bitwise.to_bits.should == '00000000'
11
+
12
+ @bitwise.set_at 1
13
+ @bitwise.set_at 2
14
+ @bitwise.set_at 3
15
+ @bitwise.set_at 5
16
+ @bitwise.to_bits.should == '01110100'
17
+ @bitwise.cardinality.should == 4
18
+
19
+ @bitwise.clear_at(1)
20
+ @bitwise.clear_at(3)
21
+ @bitwise.to_bits.should == '00100100'
22
+ @bitwise.cardinality.should == 2
23
+ end
24
+
25
+ it "should set by indexes" do
26
+ @bitwise.indexes = [1,10]
27
+ @bitwise.indexes.should == [1,10]
28
+ @bitwise.to_bits.should == '0100000000100000'
29
+ @bitwise.cardinality.should == 2
30
+ end
31
+
32
+ describe "benchmark" do
33
+ def assign_indexes(total, picks)
34
+ set = Set.new
35
+
36
+ picks.times do
37
+ set << (rand*total).to_i
38
+ end
39
+
40
+ @indexes = set.to_a
41
+ end
42
+
43
+ def measure
44
+ start = Time.now.to_f
45
+ yield
46
+ Time.now.to_f - start
47
+ end
48
+
49
+ it "indexes assignment" do
50
+ assign_indexes(1000, 10)
51
+ measure { @bitwise.indexes = @indexes }.should < 0.0001
52
+
53
+ assign_indexes(10_000, 100)
54
+ measure { @bitwise.indexes = @indexes }.should < 0.001
55
+
56
+ assign_indexes(100_000, 1000)
57
+ measure { @bitwise.indexes = @indexes }.should < 0.01
58
+
59
+ assign_indexes(1000_000, 10_000)
60
+ measure { @bitwise.indexes = @indexes }.should < 0.1
61
+ end
62
+
63
+ it "cardinality sparse" do
64
+ @bitwise.indexes = [1, 10, 100, 1000, 10_000, 100_000, 1000_000, 10_000_000, 100_000_000]
65
+ measure { @bitwise.cardinality.should == 9 }.should < 1.0
66
+ end
67
+
68
+ it "cardinality dense" do
69
+ assign_indexes(10_000_000, 100_000)
70
+ @bitwise.indexes = @indexes
71
+ measure { @bitwise.cardinality }.should < 1.0
72
+ end
73
+
74
+ it "union sparse" do
75
+ b1 = Bitwise.new
76
+ b2 = Bitwise.new
77
+ b1.indexes = [1, 10, 100, 1000, 10_000, 100_000, 1000_000, 10_000_000, 100_000_000]
78
+ b2.indexes = [2, 20, 200, 2000, 20_000, 200_000, 2000_000, 20_000_000, 200_000_000]
79
+ measure do
80
+ b3 = b1.union(b2)
81
+ b3.cardinality.should == 18
82
+ end.should < 1.0
83
+ end
84
+
85
+ it "intersect sparse" do
86
+ b1 = Bitwise.new
87
+ b2 = Bitwise.new
88
+ b1.indexes = [1, 10, 100, 1000, 10_000, 100_000, 1000_000, 10_000_000, 100_000_000]
89
+ b2.indexes = [2, 20, 200, 2000, 20_000, 200_000, 2000_000, 20_000_000, 200_000_000]
90
+ measure do
91
+ b3 = b1.intersect(b2)
92
+ b3.cardinality.should == 0
93
+ end.should < 1.0
94
+ end
95
+ end
96
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bitwise
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kenn Ejima
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-13 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Fast, memory efficient bitwise operations on large binary strings
15
+ email: kenn.ejima@gmail.com
16
+ executables: []
17
+ extensions:
18
+ - ext/bitwise/extconf.rb
19
+ extra_rdoc_files:
20
+ - LICENSE.txt
21
+ - README.md
22
+ files:
23
+ - .document
24
+ - Gemfile
25
+ - LICENSE.txt
26
+ - README.md
27
+ - Rakefile
28
+ - VERSION
29
+ - bitwise.gemspec
30
+ - ext/bitwise/bitwise.c
31
+ - ext/bitwise/extconf.rb
32
+ - lib/bitwise.rb
33
+ - lib/bitwise/bitwise.bundle
34
+ - spec/bitwise_spec.rb
35
+ homepage: http://github.com/kenn/bitwise
36
+ licenses:
37
+ - MIT
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 1.8.10
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Fast, memory efficient bitwise operations on large binary strings
60
+ test_files: []