immutable_set 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +31 -0
- data/.rspec +3 -0
- data/.travis.yml +10 -0
- data/BENCHMARK.md +131 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +83 -0
- data/Rakefile +85 -0
- data/bin/console +18 -0
- data/bin/setup +8 -0
- data/ext/immutable_set/extconf.rb +7 -0
- data/ext/immutable_set/immutable_set.c +445 -0
- data/immutable_set.gemspec +33 -0
- data/lib/immutable_set.rb +50 -0
- data/lib/immutable_set/builder_methods.rb +60 -0
- data/lib/immutable_set/disable_mutating_methods.rb +12 -0
- data/lib/immutable_set/inversion.rb +13 -0
- data/lib/immutable_set/native_ext.rb +19 -0
- data/lib/immutable_set/pure.rb +5 -0
- data/lib/immutable_set/ruby_fallback.rb +148 -0
- data/lib/immutable_set/stdlib_set_method_overrides.rb +155 -0
- data/lib/immutable_set/version.rb +3 -0
- metadata +137 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
|
+
require 'set'
|
4
|
+
require 'immutable_set/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = 'immutable_set'
|
8
|
+
s.version = ImmutableSet::VERSION
|
9
|
+
s.authors = ['Janosch Müller']
|
10
|
+
s.email = ['janosch84@gmail.com']
|
11
|
+
|
12
|
+
s.summary = "A faster, immutable replacement for Ruby's Set"
|
13
|
+
s.homepage = 'https://github.com/janosch-x/immutable_set'
|
14
|
+
s.license = 'MIT'
|
15
|
+
|
16
|
+
s.files = `git ls-files -z`.split("\x0").reject do |f|
|
17
|
+
f.match(%r{^(benchmarks|test|spec|features)/})
|
18
|
+
end
|
19
|
+
s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
s.require_paths = ['lib']
|
21
|
+
|
22
|
+
if RUBY_PLATFORM !~ /java/i
|
23
|
+
s.extensions = %w[ext/immutable_set/extconf.rb]
|
24
|
+
end
|
25
|
+
|
26
|
+
s.required_ruby_version = '>= 2.0.0'
|
27
|
+
|
28
|
+
s.add_development_dependency 'benchmark-ips', '~> 2.7'
|
29
|
+
s.add_development_dependency 'bundler', '~> 1.16'
|
30
|
+
s.add_development_dependency 'rake', '~> 10.0'
|
31
|
+
s.add_development_dependency 'rake-compiler', '~> 1.0'
|
32
|
+
s.add_development_dependency 'rspec', '~> 3.0'
|
33
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'immutable_set/builder_methods'
|
3
|
+
require 'immutable_set/native_ext'
|
4
|
+
require 'immutable_set/disable_mutating_methods'
|
5
|
+
require 'immutable_set/inversion'
|
6
|
+
require 'immutable_set/pure'
|
7
|
+
require 'immutable_set/ruby_fallback'
|
8
|
+
require 'immutable_set/stdlib_set_method_overrides'
|
9
|
+
require 'immutable_set/version'
|
10
|
+
|
11
|
+
class ImmutableSet < Set
|
12
|
+
attr_reader :max
|
13
|
+
|
14
|
+
def initialize(arg = nil)
|
15
|
+
@hash = Hash.new(false)
|
16
|
+
|
17
|
+
if arg.is_a?(ImmutableSet)
|
18
|
+
@hash = arg.instance_variable_get(:@hash)
|
19
|
+
@max = arg.max
|
20
|
+
elsif arg.is_a?(Range)
|
21
|
+
self.class.send(:feed_range_to_hash, arg, @hash)
|
22
|
+
@max = arg.max
|
23
|
+
elsif arg.respond_to?(:to_a)
|
24
|
+
sorted_arg = arg.to_a.sort
|
25
|
+
if block_given?
|
26
|
+
sorted_arg.each { |o| @hash[yield(o)] = true }
|
27
|
+
else
|
28
|
+
sorted_arg.each { |o| @hash[o] = true }
|
29
|
+
end
|
30
|
+
@max = sorted_arg.last
|
31
|
+
elsif !arg.nil?
|
32
|
+
raise ArgumentError, 'value must be enumerable'
|
33
|
+
end
|
34
|
+
|
35
|
+
@hash.freeze
|
36
|
+
end
|
37
|
+
|
38
|
+
def min
|
39
|
+
@min ||= (first_key, = @hash.first) && first_key
|
40
|
+
end
|
41
|
+
|
42
|
+
def minmax
|
43
|
+
[min, max]
|
44
|
+
end
|
45
|
+
|
46
|
+
def distinct_bounds?(other)
|
47
|
+
raise ArgumentError, 'pass an ImmutableSet' unless other.is_a?(ImmutableSet)
|
48
|
+
empty? || other.empty? || (min > other.max || max < other.min)
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#
|
2
|
+
# Builder methods that set @hash and @max.
|
3
|
+
#
|
4
|
+
class ImmutableSet < Set
|
5
|
+
class << self
|
6
|
+
# Returns an ImmutableSet.
|
7
|
+
#
|
8
|
+
# Its members will be ordered, irrespective of the order of passed Ranges.
|
9
|
+
def from_ranges(*ranges)
|
10
|
+
build_with_hash_and_max do |new_hash|
|
11
|
+
highest_max = nil
|
12
|
+
Array(ranges).sort_by(&:min).each do |range|
|
13
|
+
feed_range_to_hash(range, new_hash)
|
14
|
+
highest_max = [highest_max || range.max, range.max].max
|
15
|
+
end
|
16
|
+
highest_max
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns an ImmutableSet.
|
21
|
+
#
|
22
|
+
# This method can be directly passed a Hash and a max value.
|
23
|
+
# It also yields the Hash (or a new Hash if none is given) to any
|
24
|
+
# given block, to allow filling it while it is already attached to the
|
25
|
+
# new set, which can offer performance benefits for large hashes.
|
26
|
+
# If a block is given and no max is passed as parameter, the block must
|
27
|
+
# return the new max.
|
28
|
+
#
|
29
|
+
# Make sure to pass the *correct* max of the new Set, or things will break.
|
30
|
+
def build_with_hash_and_max(hash = nil, max = nil)
|
31
|
+
hash ||= Hash.new(false)
|
32
|
+
set = new
|
33
|
+
set.instance_variable_set(:@hash, hash)
|
34
|
+
|
35
|
+
max = yield(hash) if block_given?
|
36
|
+
raise ArgumentError, 'pass a comparable max' unless max.respond_to?(:<=>)
|
37
|
+
|
38
|
+
hash.freeze
|
39
|
+
set.instance_variable_set(:@max, max)
|
40
|
+
set
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns an ImmutableSet.
|
44
|
+
#
|
45
|
+
# Used to cast Enumerables to ImmutableSet if needed for comparisons.
|
46
|
+
def cast(obj)
|
47
|
+
obj.is_a?(ImmutableSet) ? obj : new(obj)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def feed_range_to_hash(range, hash)
|
53
|
+
if native_ext && range.begin.object_id.odd? && range.end.object_id.odd?
|
54
|
+
native_ext.fill_with_fixnums(hash, range)
|
55
|
+
else
|
56
|
+
range.each { |o| hash[o] = true }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class ImmutableSet < Set
|
2
|
+
DISABLED_METHODS = %i[<< clear clone dup keep_if merge replace reset subtract]
|
3
|
+
.concat(instance_methods.grep(/^add|^delete|.!$/))
|
4
|
+
|
5
|
+
(DISABLED_METHODS & instance_methods).each { |method| undef_method(method) }
|
6
|
+
|
7
|
+
def method_missing(method_name, *args, &block)
|
8
|
+
super unless DISABLED_METHODS.include?(method_name)
|
9
|
+
raise NoMethodError, "##{method_name} can't be called on an ImmutableSet, "\
|
10
|
+
'only on a Set/SortedSet. Use #+, #-, #^, #& instead.'
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class ImmutableSet < Set
|
2
|
+
# Returns an ImmutableSet.
|
3
|
+
#
|
4
|
+
# The result includes all members `from`..`upto` that are not in self.
|
5
|
+
# If `ucp_only` is true, invalid unicode codepoints are omitted.
|
6
|
+
def inversion(from: nil, upto: nil, ucp_only: false)
|
7
|
+
if native_ext && from.object_id.odd? && upto.object_id.odd?
|
8
|
+
native_ext.invert_fixnum_set(self, from..upto, ucp_only)
|
9
|
+
else
|
10
|
+
RubyFallback.inversion(self, from..upto, ucp_only)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class ImmutableSet < Set
|
2
|
+
native_ext_available =
|
3
|
+
begin
|
4
|
+
require_relative './immutable_set'
|
5
|
+
Kernel.const_defined?(:ImmutableSetExt)
|
6
|
+
rescue LoadError
|
7
|
+
false
|
8
|
+
end
|
9
|
+
|
10
|
+
if native_ext_available
|
11
|
+
def self.native_ext; ::ImmutableSetExt end
|
12
|
+
else
|
13
|
+
def self.native_ext; end
|
14
|
+
end
|
15
|
+
|
16
|
+
def native_ext
|
17
|
+
self.class.native_ext
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
class ImmutableSet < Set
|
2
|
+
module RubyFallback
|
3
|
+
module_function
|
4
|
+
|
5
|
+
def inversion(set, range, ucp_only)
|
6
|
+
from = range.begin
|
7
|
+
upto = range.end
|
8
|
+
|
9
|
+
set.class.build_with_hash_and_max do |new_hash|
|
10
|
+
own_min, own_max = set.minmax
|
11
|
+
new_max = nil
|
12
|
+
|
13
|
+
insertion_proc = ->(o) do
|
14
|
+
return if ucp_only && o >= 0xD800 && o <= 0xDFFF
|
15
|
+
new_hash[o] = true
|
16
|
+
new_max = o
|
17
|
+
end
|
18
|
+
|
19
|
+
if own_max.nil?
|
20
|
+
# empty Set - inversion is pretty much equal to Set[from..upto]
|
21
|
+
from.upto(upto) { |o| insertion_proc.call(o) }
|
22
|
+
next new_max
|
23
|
+
end
|
24
|
+
|
25
|
+
own_hash = set.instance_variable_get(:@hash)
|
26
|
+
o = from
|
27
|
+
|
28
|
+
# insert all below own lower boundary without check
|
29
|
+
while o < own_min && o <= upto
|
30
|
+
insertion_proc.call(o)
|
31
|
+
o = o.next
|
32
|
+
end
|
33
|
+
|
34
|
+
# insert with check within bounds
|
35
|
+
while o <= own_max && o <= upto
|
36
|
+
insertion_proc.call(o) unless own_hash.key?(o)
|
37
|
+
o = o.next
|
38
|
+
end
|
39
|
+
|
40
|
+
# insert all above own upper boundary without check
|
41
|
+
while o <= upto
|
42
|
+
insertion_proc.call(o)
|
43
|
+
o = o.next
|
44
|
+
end
|
45
|
+
|
46
|
+
new_max
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def union(set_a, set_b)
|
51
|
+
a_min, a_max = set_a.minmax
|
52
|
+
b_min, b_max = set_b.minmax
|
53
|
+
a_hash = set_a.instance_variable_get(:@hash)
|
54
|
+
b_hash = set_b.instance_variable_get(:@hash)
|
55
|
+
|
56
|
+
# disjoint sets case (self wholly below b)
|
57
|
+
if a_max < b_min
|
58
|
+
hash = a_hash.dup.update(b_hash)
|
59
|
+
return set_a.class.build_with_hash_and_max(hash, b_max)
|
60
|
+
# disjoint sets case (b wholly below self)
|
61
|
+
elsif b_max < a_min
|
62
|
+
hash = b_hash.dup.update(a_hash)
|
63
|
+
return set_a.class.build_with_hash_and_max(hash, a_max)
|
64
|
+
end
|
65
|
+
|
66
|
+
# sets with overlapping bounds case - insert objects in order
|
67
|
+
set_a.class.build_with_hash_and_max do |new_hash|
|
68
|
+
a_keys = a_hash.keys
|
69
|
+
b_keys = b_hash.keys
|
70
|
+
a_key = a_keys[i = 0]
|
71
|
+
b_key = b_keys[j = 0]
|
72
|
+
while a_key && b_key
|
73
|
+
if a_key < b_key
|
74
|
+
new_hash[a_key] = true
|
75
|
+
a_key = a_keys[i += 1]
|
76
|
+
else
|
77
|
+
new_hash[b_key] = true
|
78
|
+
b_key = b_keys[j += 1]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
remaining_keys, offset = a_key ? [a_keys, i] : [b_keys, j]
|
83
|
+
remaining_size = remaining_keys.size
|
84
|
+
while offset < remaining_size
|
85
|
+
new_hash[remaining_keys[offset]] = true
|
86
|
+
offset += 1
|
87
|
+
end
|
88
|
+
[a_max, b_max].max
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def difference(set_a, set_b)
|
93
|
+
new_hash = set_a.instance_variable_get(:@hash).dup
|
94
|
+
set_b.each { |o| new_hash.delete(o) }
|
95
|
+
set_a.class.build_with_hash_and_max(new_hash, new_hash.keys.last)
|
96
|
+
end
|
97
|
+
|
98
|
+
def intersection(set_a, set_b)
|
99
|
+
set_a.class.build_with_hash_and_max do |new_hash|
|
100
|
+
a_keys = set_a.to_a
|
101
|
+
a_max = set_a.max
|
102
|
+
|
103
|
+
b_keys = set_b.to_a
|
104
|
+
b_max = set_b.max
|
105
|
+
|
106
|
+
a_key = a_keys[i = 0]
|
107
|
+
b_key = b_keys[j = 0]
|
108
|
+
|
109
|
+
while a_key && b_key && a_key <= b_max && b_key <= a_max
|
110
|
+
if a_key == b_key
|
111
|
+
new_hash[a_key] = true
|
112
|
+
a_key = a_keys[i += 1]
|
113
|
+
b_key = b_keys[j += 1]
|
114
|
+
elsif a_key < b_key
|
115
|
+
a_key = a_keys[i += 1]
|
116
|
+
else # a_key > b_key
|
117
|
+
b_key = b_keys[j += 1]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
[a_max, b_max].min
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def intersect?(set_a, set_b)
|
126
|
+
cmp = ->(smaller_set, larger_set) do
|
127
|
+
return false if smaller_set.distinct_bounds?(larger_set)
|
128
|
+
|
129
|
+
larger_set_min, larger_set_max = larger_set.minmax
|
130
|
+
smaller_set.any? do |smaller_set_obj|
|
131
|
+
next if smaller_set_obj < larger_set_min
|
132
|
+
return false if smaller_set_obj > larger_set_max
|
133
|
+
larger_set.include?(smaller_set_obj)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
set_a.size < set_b.size ? cmp.call(set_a, set_b) : cmp.call(set_b, set_a)
|
137
|
+
end
|
138
|
+
|
139
|
+
def exclusion(set_a, set_b)
|
140
|
+
set_a.class.build_with_hash_and_max do |new_hash|
|
141
|
+
new_max = nil
|
142
|
+
set_a.each { |o| new_hash[new_max = o] = true unless set_b.include?(o) }
|
143
|
+
set_b.each { |o| new_hash[new_max = o] = true unless set_a.include?(o) }
|
144
|
+
new_max
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
class ImmutableSet < Set
|
2
|
+
#
|
3
|
+
# These comparison methods only offer a big speed gain with the C extension,
|
4
|
+
# or on Ruby < 2.3 where `Set` has no access to Hash#<=>.
|
5
|
+
#
|
6
|
+
# In Ruby, bad Enumerator#next performance makes using two of them in parallel
|
7
|
+
# slower than just looking up everything (as #super does) for many cases.
|
8
|
+
#
|
9
|
+
def superset?(set)
|
10
|
+
return super unless native_ext_can_relate?(set)
|
11
|
+
potentially_superset_of?(set) && native_ext.superset?(self, set)
|
12
|
+
end
|
13
|
+
alias >= superset?
|
14
|
+
|
15
|
+
def proper_superset?(set)
|
16
|
+
return super unless native_ext_can_relate?(set)
|
17
|
+
potentially_proper_superset_of?(set) && native_ext.superset?(self, set)
|
18
|
+
end
|
19
|
+
alias > proper_superset?
|
20
|
+
|
21
|
+
def subset?(set)
|
22
|
+
return super unless native_ext_can_relate?(set)
|
23
|
+
potentially_subset_of?(set) && native_ext.subset?(self, set)
|
24
|
+
end
|
25
|
+
alias <= subset?
|
26
|
+
|
27
|
+
def proper_subset?(set)
|
28
|
+
return super unless native_ext_can_relate?(set)
|
29
|
+
potentially_proper_subset_of?(set) && native_ext.subset?(self, set)
|
30
|
+
end
|
31
|
+
alias < proper_subset?
|
32
|
+
|
33
|
+
#
|
34
|
+
# These methods are faster both with the C extension and the Ruby fallback.
|
35
|
+
#
|
36
|
+
|
37
|
+
def |(other)
|
38
|
+
raise_unless_enumerable(other)
|
39
|
+
return self if other.empty?
|
40
|
+
|
41
|
+
other = self.class.cast(other)
|
42
|
+
relate_with_method(:union, to_other: other)
|
43
|
+
end
|
44
|
+
alias + |
|
45
|
+
alias union |
|
46
|
+
|
47
|
+
def -(other)
|
48
|
+
raise_unless_enumerable(other)
|
49
|
+
return self if other.empty?
|
50
|
+
|
51
|
+
other = self.class.cast(other)
|
52
|
+
return self if distinct_bounds?(other)
|
53
|
+
|
54
|
+
relate_with_method(:difference, to_other: other)
|
55
|
+
end
|
56
|
+
alias difference -
|
57
|
+
|
58
|
+
def &(other)
|
59
|
+
raise_unless_enumerable(other)
|
60
|
+
return self.class.new if other.empty?
|
61
|
+
|
62
|
+
other = self.class.cast(other)
|
63
|
+
return self.class.new if distinct_bounds?(other)
|
64
|
+
|
65
|
+
relate_with_method(:intersection, to_other: other)
|
66
|
+
end
|
67
|
+
alias intersection &
|
68
|
+
|
69
|
+
def ^(other)
|
70
|
+
raise_unless_enumerable(other)
|
71
|
+
return other if empty?
|
72
|
+
return self if other.empty?
|
73
|
+
|
74
|
+
other = self.class.cast(other)
|
75
|
+
return self + other if distinct_bounds?(other)
|
76
|
+
|
77
|
+
relate_with_method(:exclusion, to_other: other)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Set#intersect? at ~ O(m*n) *can* surpass ImmutableSet#intersect? at ~ O(m+n)
|
81
|
+
# for sets with *very* different sizes and unfortunately offset members.
|
82
|
+
# Example: Set[999_999].intersect?(Set.new(1..1_000_000))
|
83
|
+
STD_INTERSECT_THRESHOLD_RATIO = 1000
|
84
|
+
|
85
|
+
def intersect?(other)
|
86
|
+
raise_unless_enumerable(other)
|
87
|
+
return false if empty? || other.empty?
|
88
|
+
|
89
|
+
other = self.class.cast(other)
|
90
|
+
return false if distinct_bounds?(other)
|
91
|
+
|
92
|
+
smaller_size, larger_size = [size, other.size].minmax
|
93
|
+
return super if larger_size / smaller_size > STD_INTERSECT_THRESHOLD_RATIO
|
94
|
+
|
95
|
+
relate_with_method(:intersect?, to_other: other)
|
96
|
+
end
|
97
|
+
|
98
|
+
def classify
|
99
|
+
return super unless block_given?
|
100
|
+
|
101
|
+
classification_hash = {}
|
102
|
+
each do |o|
|
103
|
+
tmp = (classification_hash[yield(o)] ||= { data: {}, max: nil })
|
104
|
+
tmp[:data][o] = true
|
105
|
+
tmp[:max] = o
|
106
|
+
end
|
107
|
+
classification_hash.map do |k, v|
|
108
|
+
[k, self.class.build_with_hash_and_max(v[:data], v[:max])]
|
109
|
+
end.to_h
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# The following private helper methods do not exist in the stdlib.
|
114
|
+
#
|
115
|
+
private
|
116
|
+
|
117
|
+
def raise_unless_enumerable(obj)
|
118
|
+
raise ArgumentError, 'value must be enumerable' unless obj.respond_to? :each
|
119
|
+
end
|
120
|
+
|
121
|
+
def relate_with_method(method, to_other: nil)
|
122
|
+
relate_module(to_other).__send__(method, self, to_other)
|
123
|
+
end
|
124
|
+
|
125
|
+
def relate_module(other)
|
126
|
+
native_ext_can_relate?(other) ? native_ext : RubyFallback
|
127
|
+
end
|
128
|
+
|
129
|
+
# The C extension can relate two sets if it is loaded, the other set is also
|
130
|
+
# an ImmutableSet, neither is empty, and members are comparable between sets.
|
131
|
+
def native_ext_can_relate?(other)
|
132
|
+
native_ext && other.is_a?(ImmutableSet) && max && (max <=> other.max)
|
133
|
+
end
|
134
|
+
|
135
|
+
#
|
136
|
+
# These are some very fast sanity checks that can improve clear-cut cases.
|
137
|
+
# e.g.: a set with shorter bounds (at any end) can never be a superset.
|
138
|
+
# This brings huge improvements on Ruby < 2.3 (Rubies without Hash#<=>).
|
139
|
+
#
|
140
|
+
def potentially_subset_of?(other)
|
141
|
+
min >= other.min && max <= other.max
|
142
|
+
end
|
143
|
+
|
144
|
+
def potentially_proper_subset_of?(other)
|
145
|
+
potentially_subset_of?(other) && (min > other.min || max < other.max)
|
146
|
+
end
|
147
|
+
|
148
|
+
def potentially_superset_of?(other)
|
149
|
+
min <= other.min && max >= other.max
|
150
|
+
end
|
151
|
+
|
152
|
+
def potentially_proper_superset_of?(other)
|
153
|
+
potentially_superset_of?(other) && (min < other.min || max > other.max)
|
154
|
+
end
|
155
|
+
end
|