immutable_set 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ lib = File.expand_path('lib', __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'set'
4
+ require 'immutable_set/version'
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = 'immutable_set'
8
+ s.version = ImmutableSet::VERSION
9
+ s.authors = ['Janosch Müller']
10
+ s.email = ['janosch84@gmail.com']
11
+
12
+ s.summary = "A faster, immutable replacement for Ruby's Set"
13
+ s.homepage = 'https://github.com/janosch-x/immutable_set'
14
+ s.license = 'MIT'
15
+
16
+ s.files = `git ls-files -z`.split("\x0").reject do |f|
17
+ f.match(%r{^(benchmarks|test|spec|features)/})
18
+ end
19
+ s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ s.require_paths = ['lib']
21
+
22
+ if RUBY_PLATFORM !~ /java/i
23
+ s.extensions = %w[ext/immutable_set/extconf.rb]
24
+ end
25
+
26
+ s.required_ruby_version = '>= 2.0.0'
27
+
28
+ s.add_development_dependency 'benchmark-ips', '~> 2.7'
29
+ s.add_development_dependency 'bundler', '~> 1.16'
30
+ s.add_development_dependency 'rake', '~> 10.0'
31
+ s.add_development_dependency 'rake-compiler', '~> 1.0'
32
+ s.add_development_dependency 'rspec', '~> 3.0'
33
+ end
@@ -0,0 +1,50 @@
1
+ require 'set'
2
+ require 'immutable_set/builder_methods'
3
+ require 'immutable_set/native_ext'
4
+ require 'immutable_set/disable_mutating_methods'
5
+ require 'immutable_set/inversion'
6
+ require 'immutable_set/pure'
7
+ require 'immutable_set/ruby_fallback'
8
+ require 'immutable_set/stdlib_set_method_overrides'
9
+ require 'immutable_set/version'
10
+
11
+ class ImmutableSet < Set
12
+ attr_reader :max
13
+
14
+ def initialize(arg = nil)
15
+ @hash = Hash.new(false)
16
+
17
+ if arg.is_a?(ImmutableSet)
18
+ @hash = arg.instance_variable_get(:@hash)
19
+ @max = arg.max
20
+ elsif arg.is_a?(Range)
21
+ self.class.send(:feed_range_to_hash, arg, @hash)
22
+ @max = arg.max
23
+ elsif arg.respond_to?(:to_a)
24
+ sorted_arg = arg.to_a.sort
25
+ if block_given?
26
+ sorted_arg.each { |o| @hash[yield(o)] = true }
27
+ else
28
+ sorted_arg.each { |o| @hash[o] = true }
29
+ end
30
+ @max = sorted_arg.last
31
+ elsif !arg.nil?
32
+ raise ArgumentError, 'value must be enumerable'
33
+ end
34
+
35
+ @hash.freeze
36
+ end
37
+
38
+ def min
39
+ @min ||= (first_key, = @hash.first) && first_key
40
+ end
41
+
42
+ def minmax
43
+ [min, max]
44
+ end
45
+
46
+ def distinct_bounds?(other)
47
+ raise ArgumentError, 'pass an ImmutableSet' unless other.is_a?(ImmutableSet)
48
+ empty? || other.empty? || (min > other.max || max < other.min)
49
+ end
50
+ end
@@ -0,0 +1,60 @@
1
+ #
2
+ # Builder methods that set @hash and @max.
3
+ #
4
+ class ImmutableSet < Set
5
+ class << self
6
+ # Returns an ImmutableSet.
7
+ #
8
+ # Its members will be ordered, irrespective of the order of passed Ranges.
9
+ def from_ranges(*ranges)
10
+ build_with_hash_and_max do |new_hash|
11
+ highest_max = nil
12
+ Array(ranges).sort_by(&:min).each do |range|
13
+ feed_range_to_hash(range, new_hash)
14
+ highest_max = [highest_max || range.max, range.max].max
15
+ end
16
+ highest_max
17
+ end
18
+ end
19
+
20
+ # Returns an ImmutableSet.
21
+ #
22
+ # This method can be directly passed a Hash and a max value.
23
+ # It also yields the Hash (or a new Hash if none is given) to any
24
+ # given block, to allow filling it while it is already attached to the
25
+ # new set, which can offer performance benefits for large hashes.
26
+ # If a block is given and no max is passed as parameter, the block must
27
+ # return the new max.
28
+ #
29
+ # Make sure to pass the *correct* max of the new Set, or things will break.
30
+ def build_with_hash_and_max(hash = nil, max = nil)
31
+ hash ||= Hash.new(false)
32
+ set = new
33
+ set.instance_variable_set(:@hash, hash)
34
+
35
+ max = yield(hash) if block_given?
36
+ raise ArgumentError, 'pass a comparable max' unless max.respond_to?(:<=>)
37
+
38
+ hash.freeze
39
+ set.instance_variable_set(:@max, max)
40
+ set
41
+ end
42
+
43
+ # Returns an ImmutableSet.
44
+ #
45
+ # Used to cast Enumerables to ImmutableSet if needed for comparisons.
46
+ def cast(obj)
47
+ obj.is_a?(ImmutableSet) ? obj : new(obj)
48
+ end
49
+
50
+ private
51
+
52
+ def feed_range_to_hash(range, hash)
53
+ if native_ext && range.begin.object_id.odd? && range.end.object_id.odd?
54
+ native_ext.fill_with_fixnums(hash, range)
55
+ else
56
+ range.each { |o| hash[o] = true }
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,12 @@
1
+ class ImmutableSet < Set
2
+ DISABLED_METHODS = %i[<< clear clone dup keep_if merge replace reset subtract]
3
+ .concat(instance_methods.grep(/^add|^delete|.!$/))
4
+
5
+ (DISABLED_METHODS & instance_methods).each { |method| undef_method(method) }
6
+
7
+ def method_missing(method_name, *args, &block)
8
+ super unless DISABLED_METHODS.include?(method_name)
9
+ raise NoMethodError, "##{method_name} can't be called on an ImmutableSet, "\
10
+ 'only on a Set/SortedSet. Use #+, #-, #^, #& instead.'
11
+ end
12
+ end
@@ -0,0 +1,13 @@
1
+ class ImmutableSet < Set
2
+ # Returns an ImmutableSet.
3
+ #
4
+ # The result includes all members `from`..`upto` that are not in self.
5
+ # If `ucp_only` is true, invalid unicode codepoints are omitted.
6
+ def inversion(from: nil, upto: nil, ucp_only: false)
7
+ if native_ext && from.object_id.odd? && upto.object_id.odd?
8
+ native_ext.invert_fixnum_set(self, from..upto, ucp_only)
9
+ else
10
+ RubyFallback.inversion(self, from..upto, ucp_only)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ class ImmutableSet < Set
2
+ native_ext_available =
3
+ begin
4
+ require_relative './immutable_set'
5
+ Kernel.const_defined?(:ImmutableSetExt)
6
+ rescue LoadError
7
+ false
8
+ end
9
+
10
+ if native_ext_available
11
+ def self.native_ext; ::ImmutableSetExt end
12
+ else
13
+ def self.native_ext; end
14
+ end
15
+
16
+ def native_ext
17
+ self.class.native_ext
18
+ end
19
+ end
@@ -0,0 +1,5 @@
1
+ class ImmutableSet < Set
2
+ class Pure < ImmutableSet
3
+ def self.native_ext; end
4
+ end
5
+ end
@@ -0,0 +1,148 @@
1
+ class ImmutableSet < Set
2
+ module RubyFallback
3
+ module_function
4
+
5
+ def inversion(set, range, ucp_only)
6
+ from = range.begin
7
+ upto = range.end
8
+
9
+ set.class.build_with_hash_and_max do |new_hash|
10
+ own_min, own_max = set.minmax
11
+ new_max = nil
12
+
13
+ insertion_proc = ->(o) do
14
+ return if ucp_only && o >= 0xD800 && o <= 0xDFFF
15
+ new_hash[o] = true
16
+ new_max = o
17
+ end
18
+
19
+ if own_max.nil?
20
+ # empty Set - inversion is pretty much equal to Set[from..upto]
21
+ from.upto(upto) { |o| insertion_proc.call(o) }
22
+ next new_max
23
+ end
24
+
25
+ own_hash = set.instance_variable_get(:@hash)
26
+ o = from
27
+
28
+ # insert all below own lower boundary without check
29
+ while o < own_min && o <= upto
30
+ insertion_proc.call(o)
31
+ o = o.next
32
+ end
33
+
34
+ # insert with check within bounds
35
+ while o <= own_max && o <= upto
36
+ insertion_proc.call(o) unless own_hash.key?(o)
37
+ o = o.next
38
+ end
39
+
40
+ # insert all above own upper boundary without check
41
+ while o <= upto
42
+ insertion_proc.call(o)
43
+ o = o.next
44
+ end
45
+
46
+ new_max
47
+ end
48
+ end
49
+
50
+ def union(set_a, set_b)
51
+ a_min, a_max = set_a.minmax
52
+ b_min, b_max = set_b.minmax
53
+ a_hash = set_a.instance_variable_get(:@hash)
54
+ b_hash = set_b.instance_variable_get(:@hash)
55
+
56
+ # disjoint sets case (self wholly below b)
57
+ if a_max < b_min
58
+ hash = a_hash.dup.update(b_hash)
59
+ return set_a.class.build_with_hash_and_max(hash, b_max)
60
+ # disjoint sets case (b wholly below self)
61
+ elsif b_max < a_min
62
+ hash = b_hash.dup.update(a_hash)
63
+ return set_a.class.build_with_hash_and_max(hash, a_max)
64
+ end
65
+
66
+ # sets with overlapping bounds case - insert objects in order
67
+ set_a.class.build_with_hash_and_max do |new_hash|
68
+ a_keys = a_hash.keys
69
+ b_keys = b_hash.keys
70
+ a_key = a_keys[i = 0]
71
+ b_key = b_keys[j = 0]
72
+ while a_key && b_key
73
+ if a_key < b_key
74
+ new_hash[a_key] = true
75
+ a_key = a_keys[i += 1]
76
+ else
77
+ new_hash[b_key] = true
78
+ b_key = b_keys[j += 1]
79
+ end
80
+ end
81
+
82
+ remaining_keys, offset = a_key ? [a_keys, i] : [b_keys, j]
83
+ remaining_size = remaining_keys.size
84
+ while offset < remaining_size
85
+ new_hash[remaining_keys[offset]] = true
86
+ offset += 1
87
+ end
88
+ [a_max, b_max].max
89
+ end
90
+ end
91
+
92
+ def difference(set_a, set_b)
93
+ new_hash = set_a.instance_variable_get(:@hash).dup
94
+ set_b.each { |o| new_hash.delete(o) }
95
+ set_a.class.build_with_hash_and_max(new_hash, new_hash.keys.last)
96
+ end
97
+
98
+ def intersection(set_a, set_b)
99
+ set_a.class.build_with_hash_and_max do |new_hash|
100
+ a_keys = set_a.to_a
101
+ a_max = set_a.max
102
+
103
+ b_keys = set_b.to_a
104
+ b_max = set_b.max
105
+
106
+ a_key = a_keys[i = 0]
107
+ b_key = b_keys[j = 0]
108
+
109
+ while a_key && b_key && a_key <= b_max && b_key <= a_max
110
+ if a_key == b_key
111
+ new_hash[a_key] = true
112
+ a_key = a_keys[i += 1]
113
+ b_key = b_keys[j += 1]
114
+ elsif a_key < b_key
115
+ a_key = a_keys[i += 1]
116
+ else # a_key > b_key
117
+ b_key = b_keys[j += 1]
118
+ end
119
+ end
120
+
121
+ [a_max, b_max].min
122
+ end
123
+ end
124
+
125
+ def intersect?(set_a, set_b)
126
+ cmp = ->(smaller_set, larger_set) do
127
+ return false if smaller_set.distinct_bounds?(larger_set)
128
+
129
+ larger_set_min, larger_set_max = larger_set.minmax
130
+ smaller_set.any? do |smaller_set_obj|
131
+ next if smaller_set_obj < larger_set_min
132
+ return false if smaller_set_obj > larger_set_max
133
+ larger_set.include?(smaller_set_obj)
134
+ end
135
+ end
136
+ set_a.size < set_b.size ? cmp.call(set_a, set_b) : cmp.call(set_b, set_a)
137
+ end
138
+
139
+ def exclusion(set_a, set_b)
140
+ set_a.class.build_with_hash_and_max do |new_hash|
141
+ new_max = nil
142
+ set_a.each { |o| new_hash[new_max = o] = true unless set_b.include?(o) }
143
+ set_b.each { |o| new_hash[new_max = o] = true unless set_a.include?(o) }
144
+ new_max
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,155 @@
1
+ class ImmutableSet < Set
2
+ #
3
+ # These comparison methods only offer a big speed gain with the C extension,
4
+ # or on Ruby < 2.3 where `Set` has no access to Hash#<=>.
5
+ #
6
+ # In Ruby, bad Enumerator#next performance makes using two of them in parallel
7
+ # slower than just looking up everything (as #super does) for many cases.
8
+ #
9
+ def superset?(set)
10
+ return super unless native_ext_can_relate?(set)
11
+ potentially_superset_of?(set) && native_ext.superset?(self, set)
12
+ end
13
+ alias >= superset?
14
+
15
+ def proper_superset?(set)
16
+ return super unless native_ext_can_relate?(set)
17
+ potentially_proper_superset_of?(set) && native_ext.superset?(self, set)
18
+ end
19
+ alias > proper_superset?
20
+
21
+ def subset?(set)
22
+ return super unless native_ext_can_relate?(set)
23
+ potentially_subset_of?(set) && native_ext.subset?(self, set)
24
+ end
25
+ alias <= subset?
26
+
27
+ def proper_subset?(set)
28
+ return super unless native_ext_can_relate?(set)
29
+ potentially_proper_subset_of?(set) && native_ext.subset?(self, set)
30
+ end
31
+ alias < proper_subset?
32
+
33
+ #
34
+ # These methods are faster both with the C extension and the Ruby fallback.
35
+ #
36
+
37
+ def |(other)
38
+ raise_unless_enumerable(other)
39
+ return self if other.empty?
40
+
41
+ other = self.class.cast(other)
42
+ relate_with_method(:union, to_other: other)
43
+ end
44
+ alias + |
45
+ alias union |
46
+
47
+ def -(other)
48
+ raise_unless_enumerable(other)
49
+ return self if other.empty?
50
+
51
+ other = self.class.cast(other)
52
+ return self if distinct_bounds?(other)
53
+
54
+ relate_with_method(:difference, to_other: other)
55
+ end
56
+ alias difference -
57
+
58
+ def &(other)
59
+ raise_unless_enumerable(other)
60
+ return self.class.new if other.empty?
61
+
62
+ other = self.class.cast(other)
63
+ return self.class.new if distinct_bounds?(other)
64
+
65
+ relate_with_method(:intersection, to_other: other)
66
+ end
67
+ alias intersection &
68
+
69
+ def ^(other)
70
+ raise_unless_enumerable(other)
71
+ return other if empty?
72
+ return self if other.empty?
73
+
74
+ other = self.class.cast(other)
75
+ return self + other if distinct_bounds?(other)
76
+
77
+ relate_with_method(:exclusion, to_other: other)
78
+ end
79
+
80
+ # Set#intersect? at ~ O(m*n) *can* surpass ImmutableSet#intersect? at ~ O(m+n)
81
+ # for sets with *very* different sizes and unfortunately offset members.
82
+ # Example: Set[999_999].intersect?(Set.new(1..1_000_000))
83
+ STD_INTERSECT_THRESHOLD_RATIO = 1000
84
+
85
+ def intersect?(other)
86
+ raise_unless_enumerable(other)
87
+ return false if empty? || other.empty?
88
+
89
+ other = self.class.cast(other)
90
+ return false if distinct_bounds?(other)
91
+
92
+ smaller_size, larger_size = [size, other.size].minmax
93
+ return super if larger_size / smaller_size > STD_INTERSECT_THRESHOLD_RATIO
94
+
95
+ relate_with_method(:intersect?, to_other: other)
96
+ end
97
+
98
+ def classify
99
+ return super unless block_given?
100
+
101
+ classification_hash = {}
102
+ each do |o|
103
+ tmp = (classification_hash[yield(o)] ||= { data: {}, max: nil })
104
+ tmp[:data][o] = true
105
+ tmp[:max] = o
106
+ end
107
+ classification_hash.map do |k, v|
108
+ [k, self.class.build_with_hash_and_max(v[:data], v[:max])]
109
+ end.to_h
110
+ end
111
+
112
+ #
113
+ # The following private helper methods do not exist in the stdlib.
114
+ #
115
+ private
116
+
117
+ def raise_unless_enumerable(obj)
118
+ raise ArgumentError, 'value must be enumerable' unless obj.respond_to? :each
119
+ end
120
+
121
+ def relate_with_method(method, to_other: nil)
122
+ relate_module(to_other).__send__(method, self, to_other)
123
+ end
124
+
125
+ def relate_module(other)
126
+ native_ext_can_relate?(other) ? native_ext : RubyFallback
127
+ end
128
+
129
+ # The C extension can relate two sets if it is loaded, the other set is also
130
+ # an ImmutableSet, neither is empty, and members are comparable between sets.
131
+ def native_ext_can_relate?(other)
132
+ native_ext && other.is_a?(ImmutableSet) && max && (max <=> other.max)
133
+ end
134
+
135
+ #
136
+ # These are some very fast sanity checks that can improve clear-cut cases.
137
+ # e.g.: a set with shorter bounds (at any end) can never be a superset.
138
+ # This brings huge improvements on Ruby < 2.3 (Rubies without Hash#<=>).
139
+ #
140
+ def potentially_subset_of?(other)
141
+ min >= other.min && max <= other.max
142
+ end
143
+
144
+ def potentially_proper_subset_of?(other)
145
+ potentially_subset_of?(other) && (min > other.min || max < other.max)
146
+ end
147
+
148
+ def potentially_superset_of?(other)
149
+ min <= other.min && max >= other.max
150
+ end
151
+
152
+ def potentially_proper_superset_of?(other)
153
+ potentially_superset_of?(other) && (min < other.min || max > other.max)
154
+ end
155
+ end