immutable_set 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 142c33ab1a414a70f1d310c511174e4ca1491c25b73062057595b93412f4be54
4
+ data.tar.gz: 8b2bf71c740c2c1273c33524a0d9b5465a3bbfdc373079dd70025ec4da4a3804
5
+ SHA512:
6
+ metadata.gz: a1f80fe0c093925ed2012facf812be01d065e164199adb4d03e15ae311ab3c34d0a705fc86d90b9d187b9f3519bf3076e65199e566ea6f7b9fdd6b7c0f647871
7
+ data.tar.gz: 59628f65aad7609e958b1f8fa444b5d855dc87a66d5801ef74b099bb34dd76bcf9ce3897bcead01e5f0d73c38abe7c10f37da5498487bb7d37abb66e100481f8
@@ -0,0 +1,31 @@
1
+ *.bundle
2
+ *.gem
3
+ *.iml
4
+ *.stTheme.cache
5
+ *.sublime-project
6
+ *.sublime-workspace
7
+ *.swp
8
+ *.tmlanguage.cache
9
+ *.tmPreferences.cache
10
+ *~
11
+ .byebug_history
12
+ .DS_Store
13
+ .idea/
14
+ .ruby-gemset
15
+ .ruby-version
16
+ .tags
17
+ .tags1
18
+ bbin/
19
+ binstubs/*
20
+ bundler_stubs/*/.yardoc
21
+ Gemfile.lock
22
+ /.bundle/
23
+ /_yardoc/
24
+ /coverage/
25
+ /doc/
26
+ /pkg/
27
+ /spec/reports/
28
+ /tmp/
29
+
30
+ # rspec failure tracking
31
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,10 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.0
5
+ - 2.4
6
+ - 2.5
7
+ - 2.6
8
+ before_install:
9
+ - gem update --system
10
+ - gem install bundler
@@ -0,0 +1,131 @@
1
+ Results of `rake:benchmark` on ruby 2.5.1p57 (2018-03-29 revision 63029) [x86_64-darwin17]
2
+
3
+ Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. If the `rbtree` gem is present, `SortedSet` will [use it](https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724) and become even slower.
4
+
5
+ ```
6
+ #- with 5M overlapping items
7
+ gem: 6.6 i/s
8
+ gem w/o c: 0.8 i/s - 7.85x slower
9
+ stdlib: 0.7 i/s - 9.51x slower```
10
+ ```
11
+ #- with 5M distinct items
12
+ gem: 1429392.7 i/s
13
+ gem w/o c: 1414260.7 i/s - same-ish
14
+ stdlib: 1.0 i/s - 1456728.62x slower```
15
+ ```
16
+ #^ with 5M overlapping items
17
+ gem: 0.9 i/s
18
+ gem w/o C: 0.4 i/s - 2.12x slower
19
+ stdlib: 0.4 i/s - 2.16x slower
20
+ ```
21
+ ```
22
+ #^ with 5M distinct items
23
+ gem w/o C: 0.8 i/s
24
+ gem: 0.6 i/s - 1.25x slower
25
+ stdlib: 0.5 i/s - 1.65x slower
26
+ ```
27
+ ```
28
+ #intersect? with 5M intersecting items
29
+ gem: 266.8 i/s
30
+ gem w/o C: 8.2 i/s - 32.53x slower
31
+ stdlib: 2.2 i/s - 121.88x slower
32
+ ```
33
+ ```
34
+ #intersect? with 5M sparse items (rare case?)
35
+ gem w/o C: 1442.5 i/s
36
+ gem: 185.2 i/s - 7.79x slower
37
+ stdlib: 2.0 i/s - 712.75x slower
38
+ ```
39
+ ```
40
+ #intersect? with 5M distinct items
41
+ gem: 1376038.3 i/s
42
+ gem w/o C: 1375048.5 i/s - same-ish
43
+ stdlib: 2.0 i/s - 675307.67x slower
44
+ ```
45
+ ```
46
+ #& with 5M intersecting items
47
+ gem: 6.4 i/s
48
+ gem w/o C: 2.6 i/s - 2.49x slower
49
+ Array#&: 1.3 i/s - 4.83x slower
50
+ stdlib: 0.9 i/s - 6.90x slower
51
+ ```
52
+ ```
53
+ #& with 5M sparse items (rare case?)
54
+ gem: 88.3 i/s
55
+ gem w/o C: 19.6 i/s - 4.50x slower
56
+ stdlib: 2.0 i/s - 44.46x slower
57
+ Array#&: 1.8 i/s - 49.61x slower
58
+ ```
59
+ ```
60
+ #& with 5M distinct items
61
+ gem w/o C: 578891.9 i/s
62
+ gem: 571604.2 i/s - same-ish
63
+ stdlib: 2.1 i/s - 281016.75x slower
64
+ Array#&: 1.8 i/s - 316493.80x slower
65
+ ```
66
+ ```
67
+ #inversion with 5M items
68
+ gem: 1.8 i/s
69
+ gem w/o C: 0.7 i/s - 2.58x slower
70
+ stdlib #-: 0.3 i/s - 6.67x slower
71
+ ```
72
+ ```
73
+ #inversion with 100k items
74
+ gem: 239.5 i/s
75
+ gem w/o C: 62.8 i/s - 3.81x slower
76
+ stdlib #-: 29.2 i/s - 8.22x slower
77
+ ```
78
+ ```
79
+ #minmax with 10M items
80
+ gem: 3180102.2 i/s
81
+ gem w/o C: 3170355.3 i/s - same-ish
82
+ stdlib: 5.3 i/s - 595743.46x slower
83
+ ```
84
+ ```
85
+ #minmax with 1M items
86
+ gem: 3247178.7 i/s
87
+ gem w/o C: 3231669.0 i/s - same-ish
88
+ stdlib: 52.8 i/s - 61535.19x slower
89
+ ```
90
+ ```
91
+ ::new with 5M Range items
92
+ gem: 0.8 i/s
93
+ gem w/o C: 0.6 i/s - 1.27x slower
94
+ stdlib: 0.4 i/s - 1.78x slower
95
+ ```
96
+ ```
97
+ ::new with 100k Range items
98
+ gem: 126.7 i/s
99
+ gem w/o C: 69.2 i/s - 1.83x slower
100
+ stdlib: 33.1 i/s - 3.83x slower
101
+ ```
102
+ ```
103
+ ::new with 10k Range items in 10 non-continuous Ranges
104
+ gem: 3117.6 i/s
105
+ gem w/o C: 1326.2 i/s - 2.35x slower
106
+ stdlib: 666.7 i/s - 4.68x slower
107
+ ```
108
+ ```
109
+ #(proper_)subset/superset? with 5M subset items
110
+ gem: 50.8 i/s
111
+ gem w/o C: 1.4 i/s - 37.61x slower
112
+ stdlib: 1.3 i/s - 37.71x slower
113
+ ```
114
+ ```
115
+ #(proper_)subset/superset? with 5M overlapping items
116
+ gem: 51.0 i/s
117
+ gem w/o C: 1.4 i/s - 36.49x slower
118
+ stdlib: 1.4 i/s - 36.74x slower
119
+ ```
120
+ ```
121
+ #(proper_)subset/superset? with 100k overlapping items
122
+ gem: 3238.3 i/s
123
+ stdlib: 302.9 i/s - 10.69x slower
124
+ gem w/o C: 281.8 i/s - 11.49x slower
125
+ ```
126
+ ```
127
+ #+ with 5M overlapping items
128
+ gem: 1.4 i/s
129
+ stdlib: 1.2 i/s - 1.19x slower
130
+ gem w/o C: 0.9 i/s - 1.49x slower
131
+ ```
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in immutable_set.gemspec
6
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Jannosch Müller
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,83 @@
1
+ [![Gem Version](https://badge.fury.io/rb/immutable_set.svg)](http://badge.fury.io/rb/immutable_set)
2
+ [![Build Status](https://travis-ci.org/janosch-x/immutable_set.svg?branch=master)](https://travis-ci.org/janosch-x/immutable_set)
3
+
4
+ # ImmutableSet
5
+
6
+ A faster, immutable replacement for Ruby's [`Set`](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html).
7
+
8
+ On Ruby >= 2.4, all operations are faster, some by several orders of magnitude (see [benchmarks](./BENCHMARK.md)).
9
+
10
+ #### Useful for ...
11
+
12
+ - creating and working with large sorted sets
13
+ - intersecting, merging, diffing, checking for subsets etc.
14
+ - the [advantages of immutability](https://hackernoon.com/f98e7e85b6ac)
15
+
16
+ #### Not useful for ...
17
+
18
+ - small sets and other cases where performance is negligible
19
+ - sets with mixed members or any members that are not mutually comparable
20
+ - doing a lot of adding, removing, and checking of single items
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ require 'immutable_set'
26
+
27
+ class MySet < ImmutableSet; end
28
+ ```
29
+
30
+ Mutating methods of `Set` (e.g. `#add`, `#delete`) are removed. They can be substituted like this if needed:
31
+
32
+ ```ruby
33
+ set1 = MySet[1, 2, 3]
34
+ set1 += MySet[4] # => MySet[1, 2, 3, 4]
35
+ set1 -= MySet[3] # => MySet[1, 2, 4]
36
+ ```
37
+
38
+ Immutability is required for most of the [performance optimizations](#performance-optimizations).
39
+
40
+ All other methods behave as in `Set`/`SortedSet`, so see the [official documentation](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html) for details about what they do.
41
+
42
+ ## New methods
43
+
44
+ **#distinct_bounds?**
45
+
46
+ Returns true iff the passed set is beyond the `#minmax` boundaries of `self`.
47
+
48
+ ```ruby
49
+ MySet[2, 4].distinct_bounds?(MySet[3]) # => false
50
+ MySet[2, 4].distinct_bounds?(MySet[5]) # => true
51
+ ```
52
+
53
+ **::from_ranges**
54
+
55
+ Returns a set built from all passed `Ranges`.
56
+
57
+ ```ruby
58
+ MySet.from_ranges(2..4, 6..8) # => MySet[2, 3, 4, 6, 7, 8]
59
+ ```
60
+
61
+ **#inversion**
62
+
63
+ Returns a new set of the same class, containing all members `from`..`upto` that are not in `self`. Faster than `Set.new(from..upto) - self`.
64
+
65
+ ```ruby
66
+ MySet[3, 5].inversion(from: 1, upto: 4) # => MySet[1, 2, 4]`
67
+ MySet['c'].inversion(from: 'a', upto: 'd') # => MySet['a', 'b', 'd']
68
+ ```
69
+
70
+ ## Performance optimizations
71
+
72
+ The cost of many methods is reduced from O(m*n) to O(m+n) or better. The underlying ideas are:
73
+
74
+ - never needing to sort, because the internal `@hash` is built in order and then frozen
75
+ - remembering `#max` cheaply whenever possible
76
+ - this allows skipping unneeded checks for members outside the own `#minmax` boundaries
77
+ - avoiding unneeded lookups during comparisons by iterating over both sets in parallel in C
78
+ - parallel iteration can skip over gaps in either set since both hashes are ordered
79
+ - when using Ruby, preferring `#while` over slower, scope-building iteration methods
80
+
81
+ ## Benchmarks
82
+
83
+ Run `rake benchmark` or see [BENCHMARK.md](./BENCHMARK.md).
@@ -0,0 +1,85 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
7
+
8
+ require 'rake/extensiontask'
9
+
10
+ Rake::ExtensionTask.new('immutable_set') do |ext|
11
+ ext.lib_dir = 'lib/immutable_set'
12
+ end
13
+
14
+ desc 'Download relevant ruby/spec tests, adapt to ImmutableSet and its variants'
15
+ task :sync_ruby_spec do
16
+ require 'fileutils'
17
+
18
+ variants = {
19
+ 'ImmutableSet' => './spec/ruby-spec/library/immutable_set',
20
+ 'ImmutableSet::Pure' => './spec/ruby-spec/library/immutable_set_pure',
21
+ }
22
+ variants.each { |_, dir| FileUtils.rm_rf(dir) if File.exist?(base_dir) }
23
+
24
+ `svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{base_dir}`
25
+
26
+ base = variants.first[1]
27
+ variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
28
+
29
+ variants.each.with_index do |(class_name, dir), i|
30
+ Dir["#{dir}/**/*.rb"].each do |spec|
31
+ if spec =~ %r{/(add|append|case|clear|collect|delete|filter|flatten|
32
+ initialize|keep_if|map|merge|replace|reject|select|subtract)}x
33
+ File.delete(spec)
34
+ next
35
+ end
36
+
37
+ # `i` must be added to shared example names or they'll override each other
38
+ adapted_content =
39
+ File
40
+ .read(spec)
41
+ .gsub('SortedSet', class_name)
42
+ .gsub('sorted_set_', "sorted_set_#{i}_")
43
+ .gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |method|')
44
+ .gsub('@method', 'method')
45
+ .gsub(/be_(false|true)/, 'be \1')
46
+ .gsub('mock', 'double')
47
+
48
+ File.open(spec, 'w') { |f| f.puts adapted_content }
49
+ end
50
+ end
51
+ end
52
+
53
+ desc 'Run all IPS benchmarks'
54
+ task :benchmark do
55
+ Dir['./benchmarks/*.rb'].sort.each { |file| require file }
56
+ end
57
+
58
+ namespace :benchmark do
59
+ desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
60
+ task :write_to_file do
61
+ $store_comparison_results = {}
62
+
63
+ Rake.application[:benchmark].invoke
64
+
65
+ File.open('BENCHMARK.md', 'w') do |f|
66
+ f.puts "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}",
67
+ '',
68
+ 'Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. '\
69
+ 'If the `rbtree` gem is present, `SortedSet` will [use it]'\
70
+ '(https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724)'\
71
+ ' and become even slower.',
72
+ '',
73
+
74
+ $store_comparison_results.each do |caption, result|
75
+ f.puts '```', caption, result.strip.gsub(/(same-ish).*$/, '\1').lines[1..-1], '```'
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ unless RUBY_PLATFORM =~ /java/
82
+ # recompile before benchmarking or running specs
83
+ task(:benchmark).enhance([:compile])
84
+ task(:spec).enhance([:compile])
85
+ end
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'immutable_set'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ class S < SortedSet; end
14
+ class I < ImmutableSet; end
15
+ class P < ImmutableSet::Pure; end
16
+
17
+ require "irb"
18
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS << ' -Wextra -Wno-unused-parameter -Wall -pedantic '
4
+
5
+ have_struct_member('struct st_table', 'entries')
6
+
7
+ create_makefile('immutable_set/immutable_set')
@@ -0,0 +1,445 @@
1
+ // This speeds up set methods that take an enum by iterating both in parallel.
2
+ // Simple sanity checks and casting the arg to set are done in Ruby beforehand.
3
+ // Internal hashes of the recipient and arg must have been created in order.
4
+
5
+ #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES
6
+ // the optional extension doesn't work for ruby < 2.4, skip defining module
7
+ void Init_immutable_set() {}
8
+ #else
9
+
10
+ #include "ruby.h"
11
+ #include "ruby/st.h"
12
+
13
+ enum iter_state {A_LT_B = -1, A_EQ_B = 0, A_GT_B = 1, EOF_A = -2, EOF_B = -3};
14
+ enum iter_action {ITER_ADVANCE_A, ITER_ADVANCE_B, ITER_ADVANCE_BOTH, ITER_END};
15
+
16
+ typedef enum iter_state(*compare_function)(VALUE, VALUE);
17
+ typedef enum iter_action(*comp_callback)(enum iter_state, VALUE*);
18
+ typedef enum iter_action(*proc_callback)(enum iter_state, VALUE*, VALUE, VALUE);
19
+
20
+ static enum iter_state
21
+ compare_fixnum_values(VALUE a, VALUE b) {
22
+ if (a < b) return A_LT_B;
23
+ if (a > b) return A_GT_B;
24
+ return A_EQ_B;
25
+ }
26
+
27
+ static enum iter_state
28
+ compare_any_values(VALUE a, VALUE b) {
29
+ return rb_cmpint(rb_funcallv(a, rb_intern("<=>"), 1, &b), a, b);
30
+ }
31
+
32
+ #ifndef STRING_P
33
+ # define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
34
+ #endif
35
+
36
+ static compare_function
37
+ optimal_compare_function(VALUE set_a, VALUE set_b) {
38
+ VALUE max_a, max_b;
39
+
40
+ max_a = rb_iv_get(set_a, "@max");
41
+ max_b = rb_iv_get(set_b, "@max");
42
+
43
+ if (FIXNUM_P(max_a) && FIXNUM_P(max_b)) return compare_fixnum_values;
44
+ if (STRING_P(max_a) && STRING_P(max_b)) return rb_str_cmp;
45
+ return compare_any_values;
46
+ }
47
+
48
+ struct LOC_st_stable_entry {
49
+ st_index_t hash;
50
+ st_data_t key;
51
+ st_data_t record;
52
+ };
53
+
54
+ static struct LOC_st_stable_entry*
55
+ set_entries_ptr(VALUE set, st_index_t* size_ptr) {
56
+ VALUE hash;
57
+
58
+ hash = rb_iv_get(set, "@hash");
59
+ *size_ptr = RHASH_SIZE(hash);
60
+
61
+ return (struct LOC_st_stable_entry*)RHASH_TBL(hash)->entries;
62
+ }
63
+
64
+ #define PARALLEL_ITERATE(...) \
65
+ st_index_t size_a, size_b, i, j; \
66
+ compare_function compare_func; \
67
+ enum iter_state state; \
68
+ struct LOC_st_stable_entry *entries_a, *entries_b; \
69
+ VALUE entry_a, entry_b; \
70
+ \
71
+ entries_a = set_entries_ptr(set_a, &size_a); \
72
+ entries_b = set_entries_ptr(set_b, &size_b); \
73
+ if (!size_a || !size_b) return memo; \
74
+ \
75
+ i = j = 0; \
76
+ entry_a = entries_a[i].key; \
77
+ entry_b = entries_b[j].key; \
78
+ compare_func = optimal_compare_function(set_a, set_b); \
79
+ \
80
+ for (;;) { \
81
+ state = (*compare_func)(entry_a, entry_b); \
82
+ \
83
+ eval_state: \
84
+ switch((*callback)(state, __VA_ARGS__)) { \
85
+ case ITER_ADVANCE_A: \
86
+ if (++i >= size_a) { state = EOF_A; goto eval_state; } \
87
+ entry_a = entries_a[i].key; \
88
+ continue; \
89
+ case ITER_ADVANCE_B: \
90
+ if (++j >= size_b) { state = EOF_B; goto eval_state; } \
91
+ entry_b = entries_b[j].key; \
92
+ continue; \
93
+ case ITER_ADVANCE_BOTH: \
94
+ if (++i >= size_a) { state = EOF_A; goto eval_state; } \
95
+ entry_a = entries_a[i].key; \
96
+ if (++j >= size_b) { state = EOF_B; goto eval_state; } \
97
+ entry_b = entries_b[j].key; \
98
+ continue; \
99
+ case ITER_END: \
100
+ return memo; \
101
+ } \
102
+ } \
103
+
104
+ static VALUE
105
+ parallel_compare(VALUE set_a, VALUE set_b, comp_callback callback, VALUE memo) {
106
+ PARALLEL_ITERATE(&memo);
107
+ }
108
+
109
+ static VALUE
110
+ parallel_process(VALUE set_a, VALUE set_b, proc_callback callback, VALUE memo) {
111
+ PARALLEL_ITERATE(&memo, entry_a, entry_b);
112
+ }
113
+
114
+ static enum iter_action
115
+ check_first_subset_of_second(enum iter_state state, VALUE* memo) {
116
+ switch(state) {
117
+ case A_LT_B: *memo = Qfalse; break; // entry_a not in set_b
118
+ case A_EQ_B: return ITER_ADVANCE_BOTH;
119
+ case A_GT_B: return ITER_ADVANCE_B;
120
+ case EOF_A: *memo = Qtrue; break; // checked all in set_a
121
+ case EOF_B: *memo = Qfalse; break; // no more comparandi in set_b
122
+ }
123
+ return ITER_END;
124
+ }
125
+
126
+ // Returns Qtrue if SET_A is a subset (proper or not) of SET_B, else Qfalse.
127
+ static VALUE
128
+ method_subset_p(VALUE self, VALUE set_a, VALUE set_b) {
129
+ return parallel_compare(set_a, set_b, check_first_subset_of_second, Qfalse);
130
+ }
131
+
132
+ // Returns Qtrue if SET_A is a superset (proper or not) of SET_B, else Qfalse.
133
+ static VALUE
134
+ method_superset_p(VALUE self, VALUE set_a, VALUE set_b) {
135
+ return parallel_compare(set_b, set_a, check_first_subset_of_second, Qfalse);
136
+ }
137
+
138
+ // TODO: if (a > b max || b > a max) *memo = Qfalse; break; ?
139
+ static enum iter_action
140
+ check_if_intersect(enum iter_state state, VALUE* memo) {
141
+ switch(state) {
142
+ case A_LT_B: return ITER_ADVANCE_A;
143
+ case A_EQ_B: *memo = Qtrue; break; // found common member
144
+ case A_GT_B: return ITER_ADVANCE_B;
145
+ case EOF_A: *memo = Qfalse; break;
146
+ case EOF_B: *memo = Qfalse; break;
147
+ }
148
+ return ITER_END;
149
+ }
150
+
151
+ // Returns Qtrue if SET_A intersects with SET_B, else Qfalse.
152
+ static VALUE
153
+ method_intersect_p(VALUE self, VALUE set_a, VALUE set_b) {
154
+ return parallel_compare(set_a, set_b, check_if_intersect, Qfalse);
155
+ }
156
+
157
+ static void
158
+ set_max_ivar_for_set(VALUE set) {
159
+ struct LOC_st_stable_entry *entries;
160
+ st_index_t size;
161
+
162
+ entries = set_entries_ptr(set, &size);
163
+ if (size) rb_iv_set(set, "@max", entries[size - 1].key);
164
+ }
165
+
166
+ #define MEMO_HASH (memo[0])
167
+ #define MEMO_SET_A_DEPLETED (memo[1])
168
+ #define MEMO_SET_B_DEPLETED (memo[2])
169
+
170
+ // helper to process two sets and build a new one in parallel
171
+ static VALUE
172
+ parallel_build(VALUE set_a, VALUE set_b, proc_callback proc) {
173
+ VALUE new_set, new_hash, memo[3];
174
+
175
+ // prepare new Set
176
+ new_set = rb_class_new_instance(0, 0, RBASIC(set_a)->klass);
177
+ new_hash = rb_hash_new();
178
+ rb_iv_set(new_set, "@hash", new_hash);
179
+
180
+ MEMO_HASH = new_hash;
181
+ MEMO_SET_A_DEPLETED = 0;
182
+ MEMO_SET_B_DEPLETED = 0;
183
+
184
+ parallel_process(set_a, set_b, proc, (VALUE)memo);
185
+
186
+ set_max_ivar_for_set(new_set);
187
+ rb_obj_freeze(new_hash);
188
+
189
+ return new_set;
190
+ }
191
+
192
+ static enum iter_action
193
+ add_shared_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
194
+ VALUE *memo;
195
+
196
+ switch(state) {
197
+ case A_LT_B: return ITER_ADVANCE_A;
198
+ case A_EQ_B:
199
+ memo = (VALUE*)*memp;
200
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
201
+ return ITER_ADVANCE_BOTH;
202
+ case A_GT_B: return ITER_ADVANCE_B;
203
+ case EOF_A: break;
204
+ case EOF_B: break;
205
+ }
206
+ return ITER_END;
207
+ }
208
+
209
+ // Returns a new set containing all members shared by SET_A and SET_B.
210
+ static VALUE
211
+ method_intersection(VALUE self, VALUE set_a, VALUE set_b) {
212
+ return parallel_build(set_a, set_b, add_shared_to_hash);
213
+ }
214
+
215
+ static enum iter_action
216
+ add_any_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
217
+ VALUE *memo = (VALUE*)*memp;
218
+
219
+ switch(state) {
220
+ case A_LT_B:
221
+ if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
222
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
223
+ return ITER_ADVANCE_B;
224
+ }
225
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
226
+ return ITER_ADVANCE_A;
227
+ case A_EQ_B:
228
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
229
+ return ITER_ADVANCE_BOTH; // shared member
230
+ case A_GT_B:
231
+ if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
232
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
233
+ return ITER_ADVANCE_A;
234
+ }
235
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
236
+ return ITER_ADVANCE_B;
237
+ case EOF_A:
238
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
239
+ MEMO_SET_A_DEPLETED = 1;
240
+ if (MEMO_SET_B_DEPLETED) break; // break if both sets depleted
241
+ return ITER_ADVANCE_B;
242
+ case EOF_B:
243
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
244
+ MEMO_SET_B_DEPLETED = 1;
245
+ if (MEMO_SET_A_DEPLETED) break; // break if both sets depleted
246
+ return ITER_ADVANCE_A;
247
+ }
248
+ return ITER_END;
249
+ }
250
+
251
+ // Returns a new set that includes all members of SET_A and/or SET_B.
252
+ static VALUE
253
+ method_union(VALUE self, VALUE set_a, VALUE set_b) {
254
+ return parallel_build(set_a, set_b, add_any_members_to_hash);
255
+ }
256
+
257
+ #define INSERT_UNLESS_EQUAL(val, other, hsh) \
258
+ if (compare_any_values(val, other)) { st_insert(RHASH_TBL(hsh), val, Qtrue); }
259
+
260
+ static enum iter_action
261
+ add_nonb_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
262
+ VALUE *memo = (VALUE*)*memp;
263
+
264
+ switch(state) {
265
+ case A_LT_B:
266
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
267
+ return ITER_ADVANCE_A;
268
+ case A_EQ_B:
269
+ return ITER_ADVANCE_BOTH; // shared member
270
+ case A_GT_B:
271
+ if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
272
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
273
+ return ITER_ADVANCE_A;
274
+ }
275
+ return ITER_ADVANCE_B;
276
+ case EOF_A:
277
+ // if set b is also depleted, add a unless equal to final b
278
+ if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); }
279
+ break;
280
+ case EOF_B:
281
+ MEMO_SET_B_DEPLETED = 1;
282
+ return ITER_ADVANCE_A;
283
+ }
284
+ return ITER_END;
285
+ }
286
+
287
+ // Returns a new set that includes any member of either passed set.
288
+ static VALUE
289
+ method_difference(VALUE self, VALUE set_a, VALUE set_b) {
290
+ return parallel_build(set_a, set_b, add_nonb_members_to_hash);
291
+ }
292
+
293
+ static enum iter_action
294
+ add_xor_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
295
+ VALUE *memo = (VALUE*)*memp;
296
+
297
+ switch(state) {
298
+ case A_LT_B:
299
+ if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
300
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
301
+ return ITER_ADVANCE_B;
302
+ }
303
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
304
+ return ITER_ADVANCE_A;
305
+ case A_EQ_B:
306
+ return ITER_ADVANCE_BOTH; // shared member, skip
307
+ case A_GT_B:
308
+ if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
309
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
310
+ return ITER_ADVANCE_A;
311
+ }
312
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
313
+ return ITER_ADVANCE_B;
314
+ case EOF_A:
315
+ // if set b is also depleted, add a unless equal to final b and break
316
+ if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); break; }
317
+ INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); // add b unless equal to final a
318
+ MEMO_SET_A_DEPLETED = 1; // mark set a as depleted
319
+ return ITER_ADVANCE_B;
320
+ case EOF_B:
321
+ // if set a is also depleted, add b unless equal to final a and break
322
+ if (MEMO_SET_A_DEPLETED) { INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); break; }
323
+ INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); // add a unless equal to final b
324
+ MEMO_SET_B_DEPLETED = 1; // mark set b as depleted
325
+ return ITER_ADVANCE_A;
326
+ }
327
+ return ITER_END;
328
+ }
329
+
330
+ // Returns a new set that is a XOR result of SET_A and SET_B.
331
+ static VALUE
332
+ method_exclusion(VALUE self, VALUE set_a, VALUE set_b) {
333
+ return parallel_build(set_a, set_b, add_xor_members_to_hash);
334
+ }
335
+
336
+ #define INCR_FIXNUM_ID(id) (id += 2)
337
+ #define DECR_FIXNUM_ID(id) (id -= 2)
338
+
339
+ #define GET_RANGE_FIXNUM_IDS(range, from_id, upto_id) \
340
+ int excl; \
341
+ if (!rb_range_values(range, &from_id, &upto_id, &excl)) { \
342
+ rb_raise(rb_eArgError, "Pass a Range"); \
343
+ } \
344
+ if (excl) DECR_FIXNUM_ID(upto_id); \
345
+ Check_Type(from_id, T_FIXNUM); \
346
+ Check_Type(upto_id, T_FIXNUM);
347
+
348
+ // Fills HASH will all Fixnums in RANGE.
349
+ static VALUE
350
+ method_fill_with_fixnums(VALUE self, VALUE hash, VALUE range) {
351
+ VALUE from_id, upto_id;
352
+ st_table *tbl;
353
+
354
+ GET_RANGE_FIXNUM_IDS(range, from_id, upto_id);
355
+ tbl = RHASH_TBL(hash);
356
+
357
+ while (from_id <= upto_id) {
358
+ st_insert(tbl, from_id, Qtrue);
359
+ INCR_FIXNUM_ID(from_id);
360
+ }
361
+
362
+ return upto_id;
363
+ }
364
+
365
+ inline static void
366
+ insert_fixnum_id(st_table *tbl, VALUE id, int ucp_only) {
367
+ if (!ucp_only || id <= 0x1B000 || id >= 0x1C000) {
368
+ st_insert(tbl, id, Qtrue);
369
+ }
370
+ }
371
+
372
+ // Returns a new set that is a XOR result of SET and the given RANGE.
373
+ static VALUE
374
+ method_invert_fixnum_set(VALUE self, VALUE set, VALUE range, VALUE ucp) {
375
+ VALUE fixnum_id, upto_id, new_hash, new_set, entry;
376
+ st_index_t size, i;
377
+ int ucp_only;
378
+ st_table *new_tbl;
379
+ struct LOC_st_stable_entry *entries;
380
+
381
+ GET_RANGE_FIXNUM_IDS(range, fixnum_id, upto_id);
382
+ ucp_only = ucp != Qfalse && ucp != Qnil && ucp != Qundef;
383
+
384
+ // get set members
385
+ entries = set_entries_ptr(set, &size);
386
+
387
+ // prepare new Set
388
+ new_set = rb_class_new_instance(0, 0, RBASIC(set)->klass);
389
+ new_hash = rb_hash_new();
390
+ new_tbl = RHASH_TBL(new_hash);
391
+ rb_iv_set(new_set, "@hash", new_hash);
392
+
393
+ if (size) {
394
+ i = 0;
395
+ entry = entries[i].key;
396
+
397
+ // here is the optimization: skipping unneeded comparisons with lower values
398
+ for (;;) {
399
+ if (fixnum_id == entry) {
400
+ // fixnum_id is in set, compare next fixnum with next set member
401
+ entry = entries[++i].key;
402
+ INCR_FIXNUM_ID(fixnum_id);
403
+ if (i == size || fixnum_id > upto_id) break;
404
+ }
405
+ else if (fixnum_id < entry) {
406
+ // fixnum_id is not in set, include in inversion
407
+ insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
408
+ INCR_FIXNUM_ID(fixnum_id);
409
+ if (fixnum_id > upto_id) break;
410
+ }
411
+ else /* if (fixnum_id > entry) */ {
412
+ // gap; fixnum_id might be in set, check next set member
413
+ entry = entries[++i].key;
414
+ if (i == size) break;
415
+ }
416
+ }
417
+ }
418
+
419
+ // include all fixnums beyond the range of the set
420
+ while (fixnum_id <= upto_id) {
421
+ insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
422
+ INCR_FIXNUM_ID(fixnum_id);
423
+ }
424
+
425
+ set_max_ivar_for_set(new_set);
426
+ rb_obj_freeze(new_hash);
427
+
428
+ return new_set;
429
+ }
430
+
431
+ void Init_immutable_set() {
432
+ VALUE mod;
433
+ mod = rb_define_module("ImmutableSetExt");
434
+ rb_define_singleton_method(mod, "difference", method_difference, 2);
435
+ rb_define_singleton_method(mod, "exclusion", method_exclusion, 2);
436
+ rb_define_singleton_method(mod, "fill_with_fixnums", method_fill_with_fixnums, 2);
437
+ rb_define_singleton_method(mod, "intersect?", method_intersect_p, 2);
438
+ rb_define_singleton_method(mod, "intersection", method_intersection, 2);
439
+ rb_define_singleton_method(mod, "invert_fixnum_set", method_invert_fixnum_set, 3);
440
+ rb_define_singleton_method(mod, "subset?", method_subset_p, 2);
441
+ rb_define_singleton_method(mod, "superset?", method_superset_p, 2);
442
+ rb_define_singleton_method(mod, "union", method_union, 2);
443
+ }
444
+
445
+ #endif // end of #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES ... #else ...