immutable_set 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,33 @@
1
+ lib = File.expand_path('lib', __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'set'
4
+ require 'immutable_set/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'immutable_set'
8
+ s.version = ImmutableSet::VERSION
9
+ s.authors = ['Janosch Müller']
10
+ s.email = ['janosch84@gmail.com']
11
+
12
+ s.summary = "A faster, immutable replacement for Ruby's Set"
13
+ s.homepage = 'https://github.com/janosch-x/immutable_set'
14
+ s.license = 'MIT'
15
+
16
+ s.files = `git ls-files -z`.split("\x0").reject do |f|
17
+ f.match(%r{^(benchmarks|test|spec|features)/})
18
+ end
19
+ s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ s.require_paths = ['lib']
21
+
22
+ if RUBY_PLATFORM !~ /java/i
23
+ s.extensions = %w[ext/immutable_set/extconf.rb]
24
+ end
25
+
26
+ s.required_ruby_version = '>= 2.0.0'
27
+
28
+ s.add_development_dependency 'benchmark-ips', '~> 2.7'
29
+ s.add_development_dependency 'bundler', '~> 1.16'
30
+ s.add_development_dependency 'rake', '~> 10.0'
31
+ s.add_development_dependency 'rake-compiler', '~> 1.0'
32
+ s.add_development_dependency 'rspec', '~> 3.0'
33
+ end
@@ -0,0 +1,50 @@
1
+ require 'set'
2
+ require 'immutable_set/builder_methods'
3
+ require 'immutable_set/native_ext'
4
+ require 'immutable_set/disable_mutating_methods'
5
+ require 'immutable_set/inversion'
6
+ require 'immutable_set/pure'
7
+ require 'immutable_set/ruby_fallback'
8
+ require 'immutable_set/stdlib_set_method_overrides'
9
+ require 'immutable_set/version'
10
+
11
+ class ImmutableSet < Set
12
+ attr_reader :max
13
+
14
+ def initialize(arg = nil)
15
+ @hash = Hash.new(false)
16
+
17
+ if arg.is_a?(ImmutableSet)
18
+ @hash = arg.instance_variable_get(:@hash)
19
+ @max = arg.max
20
+ elsif arg.is_a?(Range)
21
+ self.class.send(:feed_range_to_hash, arg, @hash)
22
+ @max = arg.max
23
+ elsif arg.respond_to?(:to_a)
24
+ sorted_arg = arg.to_a.sort
25
+ if block_given?
26
+ sorted_arg.each { |o| @hash[yield(o)] = true }
27
+ else
28
+ sorted_arg.each { |o| @hash[o] = true }
29
+ end
30
+ @max = sorted_arg.last
31
+ elsif !arg.nil?
32
+ raise ArgumentError, 'value must be enumerable'
33
+ end
34
+
35
+ @hash.freeze
36
+ end
37
+
38
+ def min
39
+ @min ||= (first_key, = @hash.first) && first_key
40
+ end
41
+
42
+ def minmax
43
+ [min, max]
44
+ end
45
+
46
+ def distinct_bounds?(other)
47
+ raise ArgumentError, 'pass an ImmutableSet' unless other.is_a?(ImmutableSet)
48
+ empty? || other.empty? || (min > other.max || max < other.min)
49
+ end
50
+ end
@@ -0,0 +1,60 @@
1
+ #
2
+ # Builder methods that set @hash and @max.
3
+ #
4
+ class ImmutableSet < Set
5
+ class << self
6
+ # Returns an ImmutableSet.
7
+ #
8
+ # Its members will be ordered, irrespective of the order of passed Ranges.
9
+ def from_ranges(*ranges)
10
+ build_with_hash_and_max do |new_hash|
11
+ highest_max = nil
12
+ Array(ranges).sort_by(&:min).each do |range|
13
+ feed_range_to_hash(range, new_hash)
14
+ highest_max = [highest_max || range.max, range.max].max
15
+ end
16
+ highest_max
17
+ end
18
+ end
19
+
20
+ # Returns an ImmutableSet.
21
+ #
22
+ # This method can be directly passed a Hash and a max value.
23
+ # It also yields the Hash (or a new Hash if none is given) to any
24
+ # given block, to allow filling it while it is already attached to the
25
+ # new set, which can offer performance benefits for large hashes.
26
+ # If a block is given and no max is passed as parameter, the block must
27
+ # return the new max.
28
+ #
29
+ # Make sure to pass the *correct* max of the new Set, or things will break.
30
+ def build_with_hash_and_max(hash = nil, max = nil)
31
+ hash ||= Hash.new(false)
32
+ set = new
33
+ set.instance_variable_set(:@hash, hash)
34
+
35
+ max = yield(hash) if block_given?
36
+ raise ArgumentError, 'pass a comparable max' unless max.respond_to?(:<=>)
37
+
38
+ hash.freeze
39
+ set.instance_variable_set(:@max, max)
40
+ set
41
+ end
42
+
43
+ # Returns an ImmutableSet.
44
+ #
45
+ # Used to cast Enumerables to ImmutableSet if needed for comparisons.
46
+ def cast(obj)
47
+ obj.is_a?(ImmutableSet) ? obj : new(obj)
48
+ end
49
+
50
+ private
51
+
52
+ def feed_range_to_hash(range, hash)
53
+ if native_ext && range.begin.object_id.odd? && range.end.object_id.odd?
54
+ native_ext.fill_with_fixnums(hash, range)
55
+ else
56
+ range.each { |o| hash[o] = true }
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,12 @@
1
+ class ImmutableSet < Set
2
+ DISABLED_METHODS = %i[<< clear clone dup keep_if merge replace reset subtract]
3
+ .concat(instance_methods.grep(/^add|^delete|.!$/))
4
+
5
+ (DISABLED_METHODS & instance_methods).each { |method| undef_method(method) }
6
+
7
+ def method_missing(method_name, *args, &block)
8
+ super unless DISABLED_METHODS.include?(method_name)
9
+ raise NoMethodError, "##{method_name} can't be called on an ImmutableSet, "\
10
+ 'only on a Set/SortedSet. Use #+, #-, #^, #& instead.'
11
+ end
12
+ end
@@ -0,0 +1,13 @@
1
+ class ImmutableSet < Set
2
+ # Returns an ImmutableSet.
3
+ #
4
+ # The result includes all members `from`..`upto` that are not in self.
5
+ # If `ucp_only` is true, invalid unicode codepoints are omitted.
6
+ def inversion(from: nil, upto: nil, ucp_only: false)
7
+ if native_ext && from.object_id.odd? && upto.object_id.odd?
8
+ native_ext.invert_fixnum_set(self, from..upto, ucp_only)
9
+ else
10
+ RubyFallback.inversion(self, from..upto, ucp_only)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ class ImmutableSet < Set
2
+ native_ext_available =
3
+ begin
4
+ require_relative './immutable_set'
5
+ Kernel.const_defined?(:ImmutableSetExt)
6
+ rescue LoadError
7
+ false
8
+ end
9
+
10
+ if native_ext_available
11
+ def self.native_ext; ::ImmutableSetExt end
12
+ else
13
+ def self.native_ext; end
14
+ end
15
+
16
+ def native_ext
17
+ self.class.native_ext
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ class ImmutableSet < Set
2
+ class Pure < ImmutableSet
3
+ def self.native_ext; end
4
+ end
5
+ end
@@ -0,0 +1,148 @@
1
+ class ImmutableSet < Set
2
+ module RubyFallback
3
+ module_function
4
+
5
+ def inversion(set, range, ucp_only)
6
+ from = range.begin
7
+ upto = range.end
8
+
9
+ set.class.build_with_hash_and_max do |new_hash|
10
+ own_min, own_max = set.minmax
11
+ new_max = nil
12
+
13
+ insertion_proc = ->(o) do
14
+ return if ucp_only && o >= 0xD800 && o <= 0xDFFF
15
+ new_hash[o] = true
16
+ new_max = o
17
+ end
18
+
19
+ if own_max.nil?
20
+ # empty Set - inversion is pretty much equal to Set[from..upto]
21
+ from.upto(upto) { |o| insertion_proc.call(o) }
22
+ next new_max
23
+ end
24
+
25
+ own_hash = set.instance_variable_get(:@hash)
26
+ o = from
27
+
28
+ # insert all below own lower boundary without check
29
+ while o < own_min && o <= upto
30
+ insertion_proc.call(o)
31
+ o = o.next
32
+ end
33
+
34
+ # insert with check within bounds
35
+ while o <= own_max && o <= upto
36
+ insertion_proc.call(o) unless own_hash.key?(o)
37
+ o = o.next
38
+ end
39
+
40
+ # insert all above own upper boundary without check
41
+ while o <= upto
42
+ insertion_proc.call(o)
43
+ o = o.next
44
+ end
45
+
46
+ new_max
47
+ end
48
+ end
49
+
50
+ def union(set_a, set_b)
51
+ a_min, a_max = set_a.minmax
52
+ b_min, b_max = set_b.minmax
53
+ a_hash = set_a.instance_variable_get(:@hash)
54
+ b_hash = set_b.instance_variable_get(:@hash)
55
+
56
+ # disjoint sets case (self wholly below b)
57
+ if a_max < b_min
58
+ hash = a_hash.dup.update(b_hash)
59
+ return set_a.class.build_with_hash_and_max(hash, b_max)
60
+ # disjoint sets case (b wholly below self)
61
+ elsif b_max < a_min
62
+ hash = b_hash.dup.update(a_hash)
63
+ return set_a.class.build_with_hash_and_max(hash, a_max)
64
+ end
65
+
66
+ # sets with overlapping bounds case - insert objects in order
67
+ set_a.class.build_with_hash_and_max do |new_hash|
68
+ a_keys = a_hash.keys
69
+ b_keys = b_hash.keys
70
+ a_key = a_keys[i = 0]
71
+ b_key = b_keys[j = 0]
72
+ while a_key && b_key
73
+ if a_key < b_key
74
+ new_hash[a_key] = true
75
+ a_key = a_keys[i += 1]
76
+ else
77
+ new_hash[b_key] = true
78
+ b_key = b_keys[j += 1]
79
+ end
80
+ end
81
+
82
+ remaining_keys, offset = a_key ? [a_keys, i] : [b_keys, j]
83
+ remaining_size = remaining_keys.size
84
+ while offset < remaining_size
85
+ new_hash[remaining_keys[offset]] = true
86
+ offset += 1
87
+ end
88
+ [a_max, b_max].max
89
+ end
90
+ end
91
+
92
+ def difference(set_a, set_b)
93
+ new_hash = set_a.instance_variable_get(:@hash).dup
94
+ set_b.each { |o| new_hash.delete(o) }
95
+ set_a.class.build_with_hash_and_max(new_hash, new_hash.keys.last)
96
+ end
97
+
98
+ def intersection(set_a, set_b)
99
+ set_a.class.build_with_hash_and_max do |new_hash|
100
+ a_keys = set_a.to_a
101
+ a_max = set_a.max
102
+
103
+ b_keys = set_b.to_a
104
+ b_max = set_b.max
105
+
106
+ a_key = a_keys[i = 0]
107
+ b_key = b_keys[j = 0]
108
+
109
+ while a_key && b_key && a_key <= b_max && b_key <= a_max
110
+ if a_key == b_key
111
+ new_hash[a_key] = true
112
+ a_key = a_keys[i += 1]
113
+ b_key = b_keys[j += 1]
114
+ elsif a_key < b_key
115
+ a_key = a_keys[i += 1]
116
+ else # a_key > b_key
117
+ b_key = b_keys[j += 1]
118
+ end
119
+ end
120
+
121
+ [a_max, b_max].min
122
+ end
123
+ end
124
+
125
+ def intersect?(set_a, set_b)
126
+ cmp = ->(smaller_set, larger_set) do
127
+ return false if smaller_set.distinct_bounds?(larger_set)
128
+
129
+ larger_set_min, larger_set_max = larger_set.minmax
130
+ smaller_set.any? do |smaller_set_obj|
131
+ next if smaller_set_obj < larger_set_min
132
+ return false if smaller_set_obj > larger_set_max
133
+ larger_set.include?(smaller_set_obj)
134
+ end
135
+ end
136
+ set_a.size < set_b.size ? cmp.call(set_a, set_b) : cmp.call(set_b, set_a)
137
+ end
138
+
139
+ def exclusion(set_a, set_b)
140
+ set_a.class.build_with_hash_and_max do |new_hash|
141
+ new_max = nil
142
+ set_a.each { |o| new_hash[new_max = o] = true unless set_b.include?(o) }
143
+ set_b.each { |o| new_hash[new_max = o] = true unless set_a.include?(o) }
144
+ new_max
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,155 @@
1
+ class ImmutableSet < Set
2
+ #
3
+ # These comparison methods only offer a big speed gain with the C extension,
4
+ # or on Ruby < 2.3 where `Set` has no access to Hash#<=>.
5
+ #
6
+ # In Ruby, bad Enumerator#next performance makes using two of them in parallel
7
+ # slower than just looking up everything (as #super does) for many cases.
8
+ #
9
+ def superset?(set)
10
+ return super unless native_ext_can_relate?(set)
11
+ potentially_superset_of?(set) && native_ext.superset?(self, set)
12
+ end
13
+ alias >= superset?
14
+
15
+ def proper_superset?(set)
16
+ return super unless native_ext_can_relate?(set)
17
+ potentially_proper_superset_of?(set) && native_ext.superset?(self, set)
18
+ end
19
+ alias > proper_superset?
20
+
21
+ def subset?(set)
22
+ return super unless native_ext_can_relate?(set)
23
+ potentially_subset_of?(set) && native_ext.subset?(self, set)
24
+ end
25
+ alias <= subset?
26
+
27
+ def proper_subset?(set)
28
+ return super unless native_ext_can_relate?(set)
29
+ potentially_proper_subset_of?(set) && native_ext.subset?(self, set)
30
+ end
31
+ alias < proper_subset?
32
+
33
+ #
34
+ # These methods are faster both with the C extension and the Ruby fallback.
35
+ #
36
+
37
+ def |(other)
38
+ raise_unless_enumerable(other)
39
+ return self if other.empty?
40
+
41
+ other = self.class.cast(other)
42
+ relate_with_method(:union, to_other: other)
43
+ end
44
+ alias + |
45
+ alias union |
46
+
47
+ def -(other)
48
+ raise_unless_enumerable(other)
49
+ return self if other.empty?
50
+
51
+ other = self.class.cast(other)
52
+ return self if distinct_bounds?(other)
53
+
54
+ relate_with_method(:difference, to_other: other)
55
+ end
56
+ alias difference -
57
+
58
+ def &(other)
59
+ raise_unless_enumerable(other)
60
+ return self.class.new if other.empty?
61
+
62
+ other = self.class.cast(other)
63
+ return self.class.new if distinct_bounds?(other)
64
+
65
+ relate_with_method(:intersection, to_other: other)
66
+ end
67
+ alias intersection &
68
+
69
+ def ^(other)
70
+ raise_unless_enumerable(other)
71
+ return other if empty?
72
+ return self if other.empty?
73
+
74
+ other = self.class.cast(other)
75
+ return self + other if distinct_bounds?(other)
76
+
77
+ relate_with_method(:exclusion, to_other: other)
78
+ end
79
+
80
+ # Set#intersect? at ~ O(m*n) *can* surpass ImmutableSet#intersect? at ~ O(m+n)
81
+ # for sets with *very* different sizes and unfortunately offset members.
82
+ # Example: Set[999_999].intersect?(Set.new(1..1_000_000))
83
+ STD_INTERSECT_THRESHOLD_RATIO = 1000
84
+
85
+ def intersect?(other)
86
+ raise_unless_enumerable(other)
87
+ return false if empty? || other.empty?
88
+
89
+ other = self.class.cast(other)
90
+ return false if distinct_bounds?(other)
91
+
92
+ smaller_size, larger_size = [size, other.size].minmax
93
+ return super if larger_size / smaller_size > STD_INTERSECT_THRESHOLD_RATIO
94
+
95
+ relate_with_method(:intersect?, to_other: other)
96
+ end
97
+
98
+ def classify
99
+ return super unless block_given?
100
+
101
+ classification_hash = {}
102
+ each do |o|
103
+ tmp = (classification_hash[yield(o)] ||= { data: {}, max: nil })
104
+ tmp[:data][o] = true
105
+ tmp[:max] = o
106
+ end
107
+ classification_hash.map do |k, v|
108
+ [k, self.class.build_with_hash_and_max(v[:data], v[:max])]
109
+ end.to_h
110
+ end
111
+
112
+ #
113
+ # The following private helper methods do not exist in the stdlib.
114
+ #
115
+ private
116
+
117
+ def raise_unless_enumerable(obj)
118
+ raise ArgumentError, 'value must be enumerable' unless obj.respond_to? :each
119
+ end
120
+
121
+ def relate_with_method(method, to_other: nil)
122
+ relate_module(to_other).__send__(method, self, to_other)
123
+ end
124
+
125
+ def relate_module(other)
126
+ native_ext_can_relate?(other) ? native_ext : RubyFallback
127
+ end
128
+
129
+ # The C extension can relate two sets if it is loaded, the other set is also
130
+ # an ImmutableSet, neither is empty, and members are comparable between sets.
131
+ def native_ext_can_relate?(other)
132
+ native_ext && other.is_a?(ImmutableSet) && max && (max <=> other.max)
133
+ end
134
+
135
+ #
136
+ # These are some very fast sanity checks that can improve clear-cut cases.
137
+ # e.g.: a set with shorter bounds (at any end) can never be a superset.
138
+ # This brings huge improvements on Ruby < 2.3 (Rubies without Hash#<=>).
139
+ #
140
+ def potentially_subset_of?(other)
141
+ min >= other.min && max <= other.max
142
+ end
143
+
144
+ def potentially_proper_subset_of?(other)
145
+ potentially_subset_of?(other) && (min > other.min || max < other.max)
146
+ end
147
+
148
+ def potentially_superset_of?(other)
149
+ min <= other.min && max >= other.max
150
+ end
151
+
152
+ def potentially_proper_superset_of?(other)
153
+ potentially_superset_of?(other) && (min < other.min || max > other.max)
154
+ end
155
+ end