cantor 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +97 -0
- data/Rakefile +81 -0
- data/lib/cantor.rb +54 -0
- data/lib/cantor/absolute_set.rb +295 -0
- data/lib/cantor/abstract_set.rb +174 -0
- data/lib/cantor/null_set.rb +123 -0
- data/lib/cantor/relative_complement.rb +135 -0
- data/lib/cantor/relative_set.rb +267 -0
- data/lib/cantor/universal_set.rb +102 -0
- data/spec/examples/absolute_set.example +1513 -0
- data/spec/examples/null_set.example +2 -0
- data/spec/examples/relative_set.example +2 -0
- data/spec/examples/universal_set.example +1 -0
- data/spec/spec_helper.rb +30 -0
- metadata +81 -0
data/README.md
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
# Cantor
|
2
|
+
|
3
|
+
Fast implementation of finite and complement sets in Ruby
|
4
|
+
|
5
|
+
## Constructors
|
6
|
+
|
7
|
+
* `Cantor.empty`
|
8
|
+
Finite set that contains no elements
|
9
|
+
|
10
|
+
* `Cantor.build(enum)`
|
11
|
+
Finite set containing each element in `enum`, whose domain of discourse is
|
12
|
+
unrestricted
|
13
|
+
|
14
|
+
* `Cantor.absolute(enum, universe)`
|
15
|
+
Finite set containing each element in `enum`, whose domain of discourse is
|
16
|
+
`universe`
|
17
|
+
|
18
|
+
* `Cantor.universal`
|
19
|
+
Infinite set containing every value in the universe
|
20
|
+
|
21
|
+
* `Cantor.complement(enum)`
|
22
|
+
Set containing every value except those in `enum`. Finite when `enum` is
|
23
|
+
infinite. Infinite when `enum` is finite
|
24
|
+
|
25
|
+
## Operations
|
26
|
+
|
27
|
+
* `xs.include?(x)`
|
28
|
+
* `xs.exclude?(x)`
|
29
|
+
* `xs.finite?`
|
30
|
+
* `xs.infinite?`
|
31
|
+
* `xs.empty?`
|
32
|
+
* `xs.size`
|
33
|
+
* `xs.replace(ys)`
|
34
|
+
* `~xs`
|
35
|
+
* `xs.complement`
|
36
|
+
* `xs + xs`
|
37
|
+
* `xs | ys`
|
38
|
+
* `xs.union(ys)`
|
39
|
+
* `xs - ys`
|
40
|
+
* `xs.difference(ys)`
|
41
|
+
* `xs ^ ys`
|
42
|
+
* `xs.symmetric_difference(ys)`
|
43
|
+
* `xs & ys`
|
44
|
+
* `xs.intersection(ys)`
|
45
|
+
* `xs <= ys`
|
46
|
+
* `xs.subset?(ys)`
|
47
|
+
* `xs < ys`
|
48
|
+
* `xs.proper_subset?(ys)`
|
49
|
+
* `xs >= ys`
|
50
|
+
* `xs.superset?(ys)`
|
51
|
+
* `xs > ys`
|
52
|
+
* `xs.proper_superset?(ys)`
|
53
|
+
* `xs.disjoint?(ys)`
|
54
|
+
* `xs == ys`
|
55
|
+
|
56
|
+
## Performance
|
57
|
+
|
58
|
+
Sets with a finite domain of discourse are represented using a bit string of
|
59
|
+
2<sup>|U|</sup> bits, where |U| is the size of the domain. This provides nearly
|
60
|
+
O(1) constant-time implementation using bitwise operations for all of the above
|
61
|
+
set operations.
|
62
|
+
|
63
|
+
The bit string is represented as an Integer, but as the domain grows larger
|
64
|
+
than `0.size * 8 - 2` items, the type is automatically expanded to a Bignum.
|
65
|
+
Bitwise operations on Bignums are O(|U|), which is still be significantly
|
66
|
+
faster than using the default Set library.
|
67
|
+
|
68
|
+
Sets with an unrestricted domain of discourse are implemented using a Hash.
|
69
|
+
Unary operations and membership tests are O(1) constant-time. Binary operations
|
70
|
+
on these sets is close to that of the default Set library.
|
71
|
+
|
72
|
+
### Benchmarks
|
73
|
+
|
74
|
+
These benchmarks aren't intended to be useful. While they indicate the
|
75
|
+
worst-case performance for Cantor, they probably don't show the worst
|
76
|
+
case for the standard Set library.
|
77
|
+
|
78
|
+
<table>
|
79
|
+
<tbody>
|
80
|
+
<tr>
|
81
|
+
<td><img title="intersection" src="/kputnam/cantor/raw/master/benchmark/intersection.png"/></td>
|
82
|
+
<td><img title="difference" src="/kputnam/cantor/raw/master/benchmark/difference.png"/></td>
|
83
|
+
</tr>
|
84
|
+
<tr>
|
85
|
+
<td><img title="union" src="/kputnam/cantor/raw/master/benchmark/union.png"/></td>
|
86
|
+
<td><img title="symmetric difference" src="/kputnam/cantor/raw/master/benchmark/sdifference.png"/></td>
|
87
|
+
</tr>
|
88
|
+
<tr>
|
89
|
+
<td><img title="subset" src="/kputnam/cantor/raw/master/benchmark/subset.png"/></td>
|
90
|
+
<td><img title="superset" src="/kputnam/cantor/raw/master/benchmark/superset.png"/></td>
|
91
|
+
</tr>
|
92
|
+
<tr>
|
93
|
+
<td><img title="equality" src="/kputnam/cantor/raw/master/benchmark/equality.png"/></td>
|
94
|
+
<td><img title="membership" src="/kputnam/cantor/raw/master/benchmark/membership.png"/></td>
|
95
|
+
</tr>
|
96
|
+
</tbody>
|
97
|
+
</table>
|
data/Rakefile
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require "pathname"
|
2
|
+
abspath = Pathname.new(File.dirname(__FILE__)).expand_path
|
3
|
+
relpath = abspath.relative_path_from(Pathname.pwd)
|
4
|
+
|
5
|
+
begin
|
6
|
+
require "rubygems"
|
7
|
+
require "bundler/setup"
|
8
|
+
rescue LoadError
|
9
|
+
warn "couldn't load bundler:"
|
10
|
+
warn " #{$!}"
|
11
|
+
end
|
12
|
+
|
13
|
+
task :console do
|
14
|
+
exec *%w(irb -I lib -r cantor)
|
15
|
+
end
|
16
|
+
|
17
|
+
begin
|
18
|
+
require "rspec/core/rake_task"
|
19
|
+
RSpec::Core::RakeTask.new do |t|
|
20
|
+
t.verbose = false
|
21
|
+
t.pattern = "#{relpath}/spec/examples/**/*.example"
|
22
|
+
|
23
|
+
t.rspec_opts = %w(--color --format p)
|
24
|
+
t.rspec_opts << "-I#{abspath}/spec"
|
25
|
+
end
|
26
|
+
rescue LoadError
|
27
|
+
task :spec do
|
28
|
+
warn "couldn't load rspec"
|
29
|
+
warn " #{$!}"
|
30
|
+
exit 1
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
begin
|
35
|
+
require "rcov"
|
36
|
+
begin
|
37
|
+
require "rspec/core/rake_task"
|
38
|
+
RSpec::Core::RakeTask.new(:rcov) do |t|
|
39
|
+
t.rcov = true
|
40
|
+
t.rcov_opts = "--exclude spec/,gems/,00401"
|
41
|
+
|
42
|
+
t.verbose = false
|
43
|
+
t.pattern = "#{relpath}/spec/examples/**/*.example"
|
44
|
+
|
45
|
+
t.rspec_opts = %w(--color --format p)
|
46
|
+
t.rspec_opts << "-I#{abspath}/spec"
|
47
|
+
end
|
48
|
+
rescue LoadError
|
49
|
+
task :rcov do
|
50
|
+
warn "couldn't load rspec"
|
51
|
+
warn " #{$!}"
|
52
|
+
exit 1
|
53
|
+
end
|
54
|
+
end
|
55
|
+
rescue LoadError
|
56
|
+
task :rcov do
|
57
|
+
warn "couldn't load rcov:"
|
58
|
+
warn " #{$!}"
|
59
|
+
exit 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
begin
|
64
|
+
require "yard"
|
65
|
+
|
66
|
+
# Note options are loaded from .yardopts
|
67
|
+
YARD::Rake::YardocTask.new(:yard => :clobber_yard)
|
68
|
+
|
69
|
+
task :clobber_yard do
|
70
|
+
rm_rf "#{relpath}/doc/generated"
|
71
|
+
mkdir_p "#{relpath}/doc/generated/images"
|
72
|
+
end
|
73
|
+
rescue LoadError
|
74
|
+
task :yard do
|
75
|
+
warn "couldn't load yard:"
|
76
|
+
warn " #{$!}"
|
77
|
+
exit 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
task :default => :spec
|
data/lib/cantor.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module Cantor
|
2
|
+
autoload :AbstractSet, "cantor/abstract_set"
|
3
|
+
autoload :AbsoluteSet, "cantor/absolute_set"
|
4
|
+
autoload :NullSet, "cantor/null_set"
|
5
|
+
autoload :RelativeComplement, "cantor/relative_complement"
|
6
|
+
autoload :RelativeSet, "cantor/relative_set"
|
7
|
+
autoload :UniversalSet, "cantor/universal_set"
|
8
|
+
end
|
9
|
+
|
10
|
+
class << Cantor
|
11
|
+
# @group Constructors
|
12
|
+
###########################################################################
|
13
|
+
|
14
|
+
# @return [Cantor::AbstractSet]
|
15
|
+
def build(object)
|
16
|
+
if object.is_a?(Cantor::AbstractSet)
|
17
|
+
object
|
18
|
+
elsif object.is_a?(Enumerable)
|
19
|
+
Cantor::RelativeSet.build(object)
|
20
|
+
else
|
21
|
+
raise TypeError,
|
22
|
+
"argument must be an AbstractSet or Enumerable"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Cantor::AbstractSet]
|
27
|
+
def complement(other)
|
28
|
+
build(other).complement
|
29
|
+
end
|
30
|
+
|
31
|
+
# @return [Cantor::UniversalSet]
|
32
|
+
def universal
|
33
|
+
Cantor::UniversalSet.build
|
34
|
+
end
|
35
|
+
|
36
|
+
# @return [Cantor::NullSet]
|
37
|
+
def empty
|
38
|
+
Cantor::NullSet.build
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [Cantor::AbsoluteSet]
|
42
|
+
def absolute(other, universe = other)
|
43
|
+
if universe == Cantor::UniversalSet
|
44
|
+
build(other)
|
45
|
+
elsif universe.eql?(other)
|
46
|
+
Cantor::AbsoluteSet.build(universe)
|
47
|
+
else
|
48
|
+
Cantor::AbsoluteSet.build(universe).intersection(other)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# @endgroup
|
53
|
+
###########################################################################
|
54
|
+
end
|
@@ -0,0 +1,295 @@
|
|
1
|
+
module Cantor
|
2
|
+
|
3
|
+
#
|
4
|
+
# {AbsoluteSet} is a subset of a finite, fully-enumerated universe set. This
|
5
|
+
# means every possible value that can belong to the {AbsoluteSet} must
|
6
|
+
# already belong to the universe set, which is a _finite_ collection.
|
7
|
+
#
|
8
|
+
# This implementation is fairly efficient when computing set operations on
|
9
|
+
# two sets from the same universe, especially compared to {RelativeSet}.
|
10
|
+
# Efficiency is achieved by encoding each element in the universe's
|
11
|
+
# membership to the specific subset as a bitmask. Operations can then be
|
12
|
+
# performed using bitwise operations, instead of using operations on a Hash.
|
13
|
+
#
|
14
|
+
# This data type is not suitable for sets whose elements belong to an
|
15
|
+
# huge universe of possible values, as each set requires `2**|U|` bits of
|
16
|
+
# storage where `|U|` is the size of the universe. Operations on sets that
|
17
|
+
# belong to different universes do not currently attempt to merge the two
|
18
|
+
# universe sets, as this probably a better use case for {RelativeSet}.
|
19
|
+
#
|
20
|
+
class AbsoluteSet < AbstractSet
|
21
|
+
include Enumerable
|
22
|
+
|
23
|
+
# @return [Integer]
|
24
|
+
attr_reader :mask
|
25
|
+
|
26
|
+
# @return [Hash]
|
27
|
+
attr_reader :universe
|
28
|
+
|
29
|
+
def initialize(mask, universe)
|
30
|
+
@mask, @universe = mask, universe.freeze
|
31
|
+
end
|
32
|
+
|
33
|
+
# @private
|
34
|
+
# @return [AbsoluteSet]
|
35
|
+
def copy(changes = {})
|
36
|
+
AbsoluteSet.new \
|
37
|
+
changes.fetch(:mask, @mask),
|
38
|
+
changes.fetch(:universe, @universe)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns a single element from the set, with no guarantees about which
|
42
|
+
# element. If the set is {#empty?}, the return value is undefined.
|
43
|
+
def first
|
44
|
+
@universe.each do |value, n|
|
45
|
+
unless @mask[n].zero?
|
46
|
+
return value
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Yields each element in the set to the implicit block argument
|
52
|
+
#
|
53
|
+
# @return [void]
|
54
|
+
def each
|
55
|
+
@universe.each do |value, n|
|
56
|
+
unless @mask[n].zero?
|
57
|
+
yield(value)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# (see AbstractSet#finite?)
|
63
|
+
def finite?
|
64
|
+
true
|
65
|
+
end
|
66
|
+
|
67
|
+
# (see AbstractSet#empty?)
|
68
|
+
def empty?
|
69
|
+
@mask.zero?
|
70
|
+
end
|
71
|
+
|
72
|
+
# (see AbstractSet#size)
|
73
|
+
def size
|
74
|
+
@universe.inject(0){|size, (value, n)| size + @mask[n] }
|
75
|
+
end
|
76
|
+
|
77
|
+
# (see AbstractSet#replace)
|
78
|
+
def replace(other)
|
79
|
+
if other.is_a?(AbstractSet)
|
80
|
+
other
|
81
|
+
else
|
82
|
+
copy(:mask => as_mask(other, true))
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# (see AbstractSet#include?)
|
87
|
+
def include?(element)
|
88
|
+
if n = @universe.fetch(element, false)
|
89
|
+
# Same as (@mask & (1 << n)).zero? but potentially eliminates
|
90
|
+
# converting the intermediate computation to a Ruby value
|
91
|
+
not @mask[n].zero?
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# @group Set Operations
|
96
|
+
#########################################################################
|
97
|
+
|
98
|
+
# @return [AbsoluteSet]
|
99
|
+
def map
|
100
|
+
mask = 0
|
101
|
+
|
102
|
+
@universe.each do |value, n|
|
103
|
+
unless @mask[n].zero?
|
104
|
+
value = yield(value)
|
105
|
+
|
106
|
+
if m = @universe.fetch(value, false)
|
107
|
+
mask |= (1 << m)
|
108
|
+
else
|
109
|
+
raise "universe does not contain element #{value.inspect}"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
copy(:mask => mask)
|
115
|
+
end
|
116
|
+
|
117
|
+
# @return [AbsoluteSet]
|
118
|
+
def select
|
119
|
+
mask = 0
|
120
|
+
|
121
|
+
@universe.each do |value, n|
|
122
|
+
unless @mask[n].zero? or not yield(value)
|
123
|
+
mask |= (1 << n)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
copy(:mask => mask)
|
128
|
+
end
|
129
|
+
|
130
|
+
# @return [AbsoluteSet]
|
131
|
+
def reject
|
132
|
+
mask = 0
|
133
|
+
|
134
|
+
@universe.each do |value, n|
|
135
|
+
unless @mask[n].zero? or yield(value)
|
136
|
+
mask |= (1 << n)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
copy(:mask => mask)
|
141
|
+
end
|
142
|
+
|
143
|
+
# (see AbstractSet#complement)
|
144
|
+
def complement
|
145
|
+
copy(:mask => ~@mask & ((1 << @universe.size) - 1))
|
146
|
+
end
|
147
|
+
|
148
|
+
# (see AbstractSet#union)
|
149
|
+
def union(other)
|
150
|
+
if other.is_a?(AbsoluteSet) and other.universe.eql?(@universe)
|
151
|
+
copy(:mask => @mask | other.mask)
|
152
|
+
elsif other.is_a?(AbstractSet) and other.infinite?
|
153
|
+
other.union(self)
|
154
|
+
else
|
155
|
+
copy(:mask => @mask | as_mask(other, true))
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
# (see AbstractSet#intersection)
|
160
|
+
def intersection(other)
|
161
|
+
if other.is_a?(AbsoluteSet) and other.universe.eql?(@universe)
|
162
|
+
copy(:mask => @mask & other.mask)
|
163
|
+
elsif other.is_a?(AbstractSet) and other.infinite?
|
164
|
+
other.intersection(self)
|
165
|
+
else
|
166
|
+
copy(:mask => @mask & as_mask(other, false))
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
# (see AbstractSet#difference)
|
171
|
+
def difference(other)
|
172
|
+
if other.is_a?(AbsoluteSet) and other.universe.eql?(@universe)
|
173
|
+
copy(:mask => @mask & ~other.mask)
|
174
|
+
elsif other.is_a?(AbstractSet) and other.infinite?
|
175
|
+
intersection(other.complement)
|
176
|
+
else
|
177
|
+
copy(:mask => @mask & ~as_mask(other, false))
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# (see AbstractSet#symmetric_difference)
|
182
|
+
def symmetric_difference(other)
|
183
|
+
if other.is_a?(AbsoluteSet) and other.universe.eql?(@universe)
|
184
|
+
copy(:mask => @mask ^ other.mask)
|
185
|
+
elsif other.is_a?(AbstractSet) and other.infinite?
|
186
|
+
other.symmetric_difference(self)
|
187
|
+
else
|
188
|
+
copy(:mask => @mask ^ as_mask(other, true))
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# @group Set Ordering
|
193
|
+
#########################################################################
|
194
|
+
|
195
|
+
# (see AbstractSet#==)
|
196
|
+
def ==(other)
|
197
|
+
if other.is_a?(AbsoluteSet) and other.universe.eql?(@universe)
|
198
|
+
@mask == other.mask
|
199
|
+
elsif other.is_a?(AbstractSet) and other.infinite?
|
200
|
+
false
|
201
|
+
elsif other.is_a?(Enumerable)
|
202
|
+
@mask == as_mask(other, false) and size == other.size
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# @group Pretty Printing
|
207
|
+
#########################################################################
|
208
|
+
|
209
|
+
# @return [void]
|
210
|
+
def pretty_print(q)
|
211
|
+
q.text("AbsoluteSet[#{size}/#{@universe.size}]")
|
212
|
+
q.group(2, "(", ")") do
|
213
|
+
q.breakable ""
|
214
|
+
|
215
|
+
elements = to_a
|
216
|
+
elements.take(5).each do |e|
|
217
|
+
unless q.current_group.first?
|
218
|
+
q.text ","
|
219
|
+
q.breakable
|
220
|
+
end
|
221
|
+
q.pp e
|
222
|
+
end
|
223
|
+
|
224
|
+
if elements.length > 5
|
225
|
+
q.text ","
|
226
|
+
q.breakable
|
227
|
+
q.text "..."
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
# @return [String]
|
233
|
+
def inspect
|
234
|
+
"AbsoluteSet(#{to_a.map(&:inspect).join(', ')})"
|
235
|
+
end
|
236
|
+
|
237
|
+
# @endgroup
|
238
|
+
#########################################################################
|
239
|
+
|
240
|
+
private
|
241
|
+
|
242
|
+
# @return [Integer]
|
243
|
+
def as_mask(other, strict)
|
244
|
+
mask = 0
|
245
|
+
size = 0
|
246
|
+
|
247
|
+
if other.is_a?(AbstractSet) and @universe.size < other.size
|
248
|
+
@universe.each do |value, n|
|
249
|
+
if other.include?(value)
|
250
|
+
mask |= (1 << n)
|
251
|
+
size += 1
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
if strict and size < other.size
|
256
|
+
# other is not a subset of @universe
|
257
|
+
raise ArgumentError,
|
258
|
+
"universe does not contain all elements from #{other.inspect}"
|
259
|
+
end
|
260
|
+
else
|
261
|
+
# We might land here if other is an Array, since its probably
|
262
|
+
# much worse to repeatedly call Array#include? than it is to
|
263
|
+
# iterate the entire Array only once
|
264
|
+
other.each do |x|
|
265
|
+
if n = @universe.fetch(x, false)
|
266
|
+
mask |= (1 << n)
|
267
|
+
elsif strict
|
268
|
+
raise ArgumentError,
|
269
|
+
"universe does not contain element #{x.inspect}"
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
mask
|
275
|
+
end
|
276
|
+
|
277
|
+
end
|
278
|
+
|
279
|
+
class << AbsoluteSet
|
280
|
+
# @group Constructors
|
281
|
+
#########################################################################
|
282
|
+
|
283
|
+
# @return [AbsoluteSet]
|
284
|
+
def build(values)
|
285
|
+
count = -1
|
286
|
+
universe = values.inject({}){|hash, v| hash.update(v => (count += 1)) }
|
287
|
+
|
288
|
+
new((1 << (count + 1)) - 1, universe)
|
289
|
+
end
|
290
|
+
|
291
|
+
# @endgroup
|
292
|
+
#########################################################################
|
293
|
+
end
|
294
|
+
|
295
|
+
end
|