immutable_set 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 142c33ab1a414a70f1d310c511174e4ca1491c25b73062057595b93412f4be54
4
+ data.tar.gz: 8b2bf71c740c2c1273c33524a0d9b5465a3bbfdc373079dd70025ec4da4a3804
5
+ SHA512:
6
+ metadata.gz: a1f80fe0c093925ed2012facf812be01d065e164199adb4d03e15ae311ab3c34d0a705fc86d90b9d187b9f3519bf3076e65199e566ea6f7b9fdd6b7c0f647871
7
+ data.tar.gz: 59628f65aad7609e958b1f8fa444b5d855dc87a66d5801ef74b099bb34dd76bcf9ce3897bcead01e5f0d73c38abe7c10f37da5498487bb7d37abb66e100481f8
@@ -0,0 +1,31 @@
1
+ *.bundle
2
+ *.gem
3
+ *.iml
4
+ *.stTheme.cache
5
+ *.sublime-project
6
+ *.sublime-workspace
7
+ *.swp
8
+ *.tmlanguage.cache
9
+ *.tmPreferences.cache
10
+ *~
11
+ .byebug_history
12
+ .DS_Store
13
+ .idea/
14
+ .ruby-gemset
15
+ .ruby-version
16
+ .tags
17
+ .tags1
18
+ bbin/
19
+ binstubs/*
20
+ bundler_stubs/*/.yardoc
21
+ Gemfile.lock
22
+ /.bundle/
23
+ /_yardoc/
24
+ /coverage/
25
+ /doc/
26
+ /pkg/
27
+ /spec/reports/
28
+ /tmp/
29
+
30
+ # rspec failure tracking
31
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
@@ -0,0 +1,10 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.0
5
+ - 2.4
6
+ - 2.5
7
+ - 2.6
8
+ before_install:
9
+ - gem update --system
10
+ - gem install bundler
@@ -0,0 +1,131 @@
1
+ Results of `rake:benchmark` on ruby 2.5.1p57 (2018-03-29 revision 63029) [x86_64-darwin17]
2
+
3
+ Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. If the `rbtree` gem is present, `SortedSet` will [use it](https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724) and become even slower.
4
+
5
+ ```
6
+ #- with 5M overlapping items
7
+ gem: 6.6 i/s
8
+ gem w/o c: 0.8 i/s - 7.85x slower
9
+ stdlib: 0.7 i/s - 9.51x slower```
10
+ ```
11
+ #- with 5M distinct items
12
+ gem: 1429392.7 i/s
13
+ gem w/o c: 1414260.7 i/s - same-ish
14
+ stdlib: 1.0 i/s - 1456728.62x slower```
15
+ ```
16
+ #^ with 5M overlapping items
17
+ gem: 0.9 i/s
18
+ gem w/o C: 0.4 i/s - 2.12x slower
19
+ stdlib: 0.4 i/s - 2.16x slower
20
+ ```
21
+ ```
22
+ #^ with 5M distinct items
23
+ gem w/o C: 0.8 i/s
24
+ gem: 0.6 i/s - 1.25x slower
25
+ stdlib: 0.5 i/s - 1.65x slower
26
+ ```
27
+ ```
28
+ #intersect? with 5M intersecting items
29
+ gem: 266.8 i/s
30
+ gem w/o C: 8.2 i/s - 32.53x slower
31
+ stdlib: 2.2 i/s - 121.88x slower
32
+ ```
33
+ ```
34
+ #intersect? with 5M sparse items (rare case?)
35
+ gem w/o C: 1442.5 i/s
36
+ gem: 185.2 i/s - 7.79x slower
37
+ stdlib: 2.0 i/s - 712.75x slower
38
+ ```
39
+ ```
40
+ #intersect? with 5M distinct items
41
+ gem: 1376038.3 i/s
42
+ gem w/o C: 1375048.5 i/s - same-ish
43
+ stdlib: 2.0 i/s - 675307.67x slower
44
+ ```
45
+ ```
46
+ #& with 5M intersecting items
47
+ gem: 6.4 i/s
48
+ gem w/o C: 2.6 i/s - 2.49x slower
49
+ Array#&: 1.3 i/s - 4.83x slower
50
+ stdlib: 0.9 i/s - 6.90x slower
51
+ ```
52
+ ```
53
+ #& with 5M sparse items (rare case?)
54
+ gem: 88.3 i/s
55
+ gem w/o C: 19.6 i/s - 4.50x slower
56
+ stdlib: 2.0 i/s - 44.46x slower
57
+ Array#&: 1.8 i/s - 49.61x slower
58
+ ```
59
+ ```
60
+ #& with 5M distinct items
61
+ gem w/o C: 578891.9 i/s
62
+ gem: 571604.2 i/s - same-ish
63
+ stdlib: 2.1 i/s - 281016.75x slower
64
+ Array#&: 1.8 i/s - 316493.80x slower
65
+ ```
66
+ ```
67
+ #inversion with 5M items
68
+ gem: 1.8 i/s
69
+ gem w/o C: 0.7 i/s - 2.58x slower
70
+ stdlib #-: 0.3 i/s - 6.67x slower
71
+ ```
72
+ ```
73
+ #inversion with 100k items
74
+ gem: 239.5 i/s
75
+ gem w/o C: 62.8 i/s - 3.81x slower
76
+ stdlib #-: 29.2 i/s - 8.22x slower
77
+ ```
78
+ ```
79
+ #minmax with 10M items
80
+ gem: 3180102.2 i/s
81
+ gem w/o C: 3170355.3 i/s - same-ish
82
+ stdlib: 5.3 i/s - 595743.46x slower
83
+ ```
84
+ ```
85
+ #minmax with 1M items
86
+ gem: 3247178.7 i/s
87
+ gem w/o C: 3231669.0 i/s - same-ish
88
+ stdlib: 52.8 i/s - 61535.19x slower
89
+ ```
90
+ ```
91
+ ::new with 5M Range items
92
+ gem: 0.8 i/s
93
+ gem w/o C: 0.6 i/s - 1.27x slower
94
+ stdlib: 0.4 i/s - 1.78x slower
95
+ ```
96
+ ```
97
+ ::new with 100k Range items
98
+ gem: 126.7 i/s
99
+ gem w/o C: 69.2 i/s - 1.83x slower
100
+ stdlib: 33.1 i/s - 3.83x slower
101
+ ```
102
+ ```
103
+ ::new with 10k Range items in 10 non-continuous Ranges
104
+ gem: 3117.6 i/s
105
+ gem w/o C: 1326.2 i/s - 2.35x slower
106
+ stdlib: 666.7 i/s - 4.68x slower
107
+ ```
108
+ ```
109
+ #(proper_)subset/superset? with 5M subset items
110
+ gem: 50.8 i/s
111
+ gem w/o C: 1.4 i/s - 37.61x slower
112
+ stdlib: 1.3 i/s - 37.71x slower
113
+ ```
114
+ ```
115
+ #(proper_)subset/superset? with 5M overlapping items
116
+ gem: 51.0 i/s
117
+ gem w/o C: 1.4 i/s - 36.49x slower
118
+ stdlib: 1.4 i/s - 36.74x slower
119
+ ```
120
+ ```
121
+ #(proper_)subset/superset? with 100k overlapping items
122
+ gem: 3238.3 i/s
123
+ stdlib: 302.9 i/s - 10.69x slower
124
+ gem w/o C: 281.8 i/s - 11.49x slower
125
+ ```
126
+ ```
127
+ #+ with 5M overlapping items
128
+ gem: 1.4 i/s
129
+ stdlib: 1.2 i/s - 1.19x slower
130
+ gem w/o C: 0.9 i/s - 1.49x slower
131
+ ```
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in immutable_set.gemspec
6
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Jannosch Müller
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,83 @@
1
+ [![Gem Version](https://badge.fury.io/rb/immutable_set.svg)](http://badge.fury.io/rb/immutable_set)
2
+ [![Build Status](https://travis-ci.org/janosch-x/immutable_set.svg?branch=master)](https://travis-ci.org/janosch-x/immutable_set)
3
+
4
+ # ImmutableSet
5
+
6
+ A faster, immutable replacement for Ruby's [`Set`](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html).
7
+
8
+ On Ruby >= 2.4, all operations are faster, some by several orders of magnitude (see [benchmarks](./BENCHMARK.md)).
9
+
10
+ #### Useful for ...
11
+
12
+ - creating and working with large sorted sets
13
+ - intersecting, merging, diffing, checking for subsets etc.
14
+ - the [advantages of immutability](https://hackernoon.com/f98e7e85b6ac)
15
+
16
+ #### Not useful for ...
17
+
18
+ - small sets and other cases where performance is negligible
19
+ - sets with mixed members or any members that are not mutually comparable
20
+ - doing a lot of adding, removing, and checking of single items
21
+
22
+ ## Usage
23
+
24
+ ```ruby
25
+ require 'immutable_set'
26
+
27
+ class MySet < ImmutableSet; end
28
+ ```
29
+
30
+ Mutating methods of `Set` (e.g. `#add`, `#delete`) are removed. They can be substituted like this if needed:
31
+
32
+ ```ruby
33
+ set1 = MySet[1, 2, 3]
34
+ set1 += MySet[4] # => MySet[1, 2, 3, 4]
35
+ set1 -= MySet[3] # => MySet[1, 2, 4]
36
+ ```
37
+
38
+ Immutability is required for most of the [performance optimizations](#performance-optimizations).
39
+
40
+ All other methods behave as in `Set`/`SortedSet`, so see the [official documentation](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html) for details about what they do.
41
+
42
+ ## New methods
43
+
44
+ **#distinct_bounds?**
45
+
46
+ Returns true iff the passed set is beyond the `#minmax` boundaries of `self`.
47
+
48
+ ```ruby
49
+ MySet[2, 4].distinct_bounds?(MySet[3]) # => false
50
+ MySet[2, 4].distinct_bounds?(MySet[5]) # => true
51
+ ```
52
+
53
+ **::from_ranges**
54
+
55
+ Returns a set built from all passed `Ranges`.
56
+
57
+ ```ruby
58
+ MySet.from_ranges(2..4, 6..8) # => MySet[2, 3, 4, 6, 7, 8]
59
+ ```
60
+
61
+ **#inversion**
62
+
63
+ Returns a new set of the same class, containing all members `from`..`upto` that are not in `self`. Faster than `Set.new(from..upto) - self`.
64
+
65
+ ```ruby
66
+ MySet[3, 5].inversion(from: 1, upto: 4) # => MySet[1, 2, 4]`
67
+ MySet['c'].inversion(from: 'a', upto: 'd') # => MySet['a', 'b', 'd']
68
+ ```
69
+
70
+ ## Performance optimizations
71
+
72
+ The cost of many methods is reduced from O(m*n) to O(m+n) or better. The underlying ideas are:
73
+
74
+ - never needing to sort, because the internal `@hash` is built in order and then frozen
75
+ - remembering `#max` cheaply whenever possible
76
+ - this allows skipping unneeded checks for members outside the own `#minmax` boundaries
77
+ - avoiding unneeded lookups during comparisons by iterating over both sets in parallel in C
78
+ - parallel iteration can skip over gaps in either set since both hashes are ordered
79
+ - when using Ruby, preferring `#while` over slower, scope-building iteration methods
80
+
81
+ ## Benchmarks
82
+
83
+ Run `rake benchmark` or see [BENCHMARK.md](./BENCHMARK.md).
@@ -0,0 +1,85 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task default: :spec
7
+
8
+ require 'rake/extensiontask'
9
+
10
+ Rake::ExtensionTask.new('immutable_set') do |ext|
11
+ ext.lib_dir = 'lib/immutable_set'
12
+ end
13
+
14
+ desc 'Download relevant ruby/spec tests, adapt to ImmutableSet and its variants'
15
+ task :sync_ruby_spec do
16
+ require 'fileutils'
17
+
18
+ variants = {
19
+ 'ImmutableSet' => './spec/ruby-spec/library/immutable_set',
20
+ 'ImmutableSet::Pure' => './spec/ruby-spec/library/immutable_set_pure',
21
+ }
22
+ variants.each { |_, dir| FileUtils.rm_rf(dir) if File.exist?(base_dir) }
23
+
24
+ `svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{base_dir}`
25
+
26
+ base = variants.first[1]
27
+ variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
28
+
29
+ variants.each.with_index do |(class_name, dir), i|
30
+ Dir["#{dir}/**/*.rb"].each do |spec|
31
+ if spec =~ %r{/(add|append|case|clear|collect|delete|filter|flatten|
32
+ initialize|keep_if|map|merge|replace|reject|select|subtract)}x
33
+ File.delete(spec)
34
+ next
35
+ end
36
+
37
+ # `i` must be added to shared example names or they'll override each other
38
+ adapted_content =
39
+ File
40
+ .read(spec)
41
+ .gsub('SortedSet', class_name)
42
+ .gsub('sorted_set_', "sorted_set_#{i}_")
43
+ .gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |method|')
44
+ .gsub('@method', 'method')
45
+ .gsub(/be_(false|true)/, 'be \1')
46
+ .gsub('mock', 'double')
47
+
48
+ File.open(spec, 'w') { |f| f.puts adapted_content }
49
+ end
50
+ end
51
+ end
52
+
53
+ desc 'Run all IPS benchmarks'
54
+ task :benchmark do
55
+ Dir['./benchmarks/*.rb'].sort.each { |file| require file }
56
+ end
57
+
58
+ namespace :benchmark do
59
+ desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
60
+ task :write_to_file do
61
+ $store_comparison_results = {}
62
+
63
+ Rake.application[:benchmark].invoke
64
+
65
+ File.open('BENCHMARK.md', 'w') do |f|
66
+ f.puts "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}",
67
+ '',
68
+ 'Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. '\
69
+ 'If the `rbtree` gem is present, `SortedSet` will [use it]'\
70
+ '(https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724)'\
71
+ ' and become even slower.',
72
+ '',
73
+
74
+ $store_comparison_results.each do |caption, result|
75
+ f.puts '```', caption, result.strip.gsub(/(same-ish).*$/, '\1').lines[1..-1], '```'
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ unless RUBY_PLATFORM =~ /java/
82
+ # recompile before benchmarking or running specs
83
+ task(:benchmark).enhance([:compile])
84
+ task(:spec).enhance([:compile])
85
+ end
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'immutable_set'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ class S < SortedSet; end
14
+ class I < ImmutableSet; end
15
+ class P < ImmutableSet::Pure; end
16
+
17
+ require "irb"
18
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,7 @@
1
+ require 'mkmf'
2
+
3
+ $CFLAGS << ' -Wextra -Wno-unused-parameter -Wall -pedantic '
4
+
5
+ have_struct_member('struct st_table', 'entries')
6
+
7
+ create_makefile('immutable_set/immutable_set')
@@ -0,0 +1,445 @@
1
+ // This speeds up set methods that take an enum by iterating both in parallel.
2
+ // Simple sanity checks and casting the arg to set are done in Ruby beforehand.
3
+ // Internal hashes of the recipient and arg must have been created in order.
4
+
5
+ #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES
6
+ // the optional extension doesn't work for ruby < 2.4, skip defining module
7
+ void Init_immutable_set() {}
8
+ #else
9
+
10
+ #include "ruby.h"
11
+ #include "ruby/st.h"
12
+
13
+ enum iter_state {A_LT_B = -1, A_EQ_B = 0, A_GT_B = 1, EOF_A = -2, EOF_B = -3};
14
+ enum iter_action {ITER_ADVANCE_A, ITER_ADVANCE_B, ITER_ADVANCE_BOTH, ITER_END};
15
+
16
+ typedef enum iter_state(*compare_function)(VALUE, VALUE);
17
+ typedef enum iter_action(*comp_callback)(enum iter_state, VALUE*);
18
+ typedef enum iter_action(*proc_callback)(enum iter_state, VALUE*, VALUE, VALUE);
19
+
20
+ static enum iter_state
21
+ compare_fixnum_values(VALUE a, VALUE b) {
22
+ if (a < b) return A_LT_B;
23
+ if (a > b) return A_GT_B;
24
+ return A_EQ_B;
25
+ }
26
+
27
+ static enum iter_state
28
+ compare_any_values(VALUE a, VALUE b) {
29
+ return rb_cmpint(rb_funcallv(a, rb_intern("<=>"), 1, &b), a, b);
30
+ }
31
+
32
+ #ifndef STRING_P
33
+ # define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
34
+ #endif
35
+
36
+ static compare_function
37
+ optimal_compare_function(VALUE set_a, VALUE set_b) {
38
+ VALUE max_a, max_b;
39
+
40
+ max_a = rb_iv_get(set_a, "@max");
41
+ max_b = rb_iv_get(set_b, "@max");
42
+
43
+ if (FIXNUM_P(max_a) && FIXNUM_P(max_b)) return compare_fixnum_values;
44
+ if (STRING_P(max_a) && STRING_P(max_b)) return rb_str_cmp;
45
+ return compare_any_values;
46
+ }
47
+
48
+ struct LOC_st_stable_entry {
49
+ st_index_t hash;
50
+ st_data_t key;
51
+ st_data_t record;
52
+ };
53
+
54
+ static struct LOC_st_stable_entry*
55
+ set_entries_ptr(VALUE set, st_index_t* size_ptr) {
56
+ VALUE hash;
57
+
58
+ hash = rb_iv_get(set, "@hash");
59
+ *size_ptr = RHASH_SIZE(hash);
60
+
61
+ return (struct LOC_st_stable_entry*)RHASH_TBL(hash)->entries;
62
+ }
63
+
64
+ #define PARALLEL_ITERATE(...) \
65
+ st_index_t size_a, size_b, i, j; \
66
+ compare_function compare_func; \
67
+ enum iter_state state; \
68
+ struct LOC_st_stable_entry *entries_a, *entries_b; \
69
+ VALUE entry_a, entry_b; \
70
+ \
71
+ entries_a = set_entries_ptr(set_a, &size_a); \
72
+ entries_b = set_entries_ptr(set_b, &size_b); \
73
+ if (!size_a || !size_b) return memo; \
74
+ \
75
+ i = j = 0; \
76
+ entry_a = entries_a[i].key; \
77
+ entry_b = entries_b[j].key; \
78
+ compare_func = optimal_compare_function(set_a, set_b); \
79
+ \
80
+ for (;;) { \
81
+ state = (*compare_func)(entry_a, entry_b); \
82
+ \
83
+ eval_state: \
84
+ switch((*callback)(state, __VA_ARGS__)) { \
85
+ case ITER_ADVANCE_A: \
86
+ if (++i >= size_a) { state = EOF_A; goto eval_state; } \
87
+ entry_a = entries_a[i].key; \
88
+ continue; \
89
+ case ITER_ADVANCE_B: \
90
+ if (++j >= size_b) { state = EOF_B; goto eval_state; } \
91
+ entry_b = entries_b[j].key; \
92
+ continue; \
93
+ case ITER_ADVANCE_BOTH: \
94
+ if (++i >= size_a) { state = EOF_A; goto eval_state; } \
95
+ entry_a = entries_a[i].key; \
96
+ if (++j >= size_b) { state = EOF_B; goto eval_state; } \
97
+ entry_b = entries_b[j].key; \
98
+ continue; \
99
+ case ITER_END: \
100
+ return memo; \
101
+ } \
102
+ } \
103
+
104
+ static VALUE
105
+ parallel_compare(VALUE set_a, VALUE set_b, comp_callback callback, VALUE memo) {
106
+ PARALLEL_ITERATE(&memo);
107
+ }
108
+
109
+ static VALUE
110
+ parallel_process(VALUE set_a, VALUE set_b, proc_callback callback, VALUE memo) {
111
+ PARALLEL_ITERATE(&memo, entry_a, entry_b);
112
+ }
113
+
114
+ static enum iter_action
115
+ check_first_subset_of_second(enum iter_state state, VALUE* memo) {
116
+ switch(state) {
117
+ case A_LT_B: *memo = Qfalse; break; // entry_a not in set_b
118
+ case A_EQ_B: return ITER_ADVANCE_BOTH;
119
+ case A_GT_B: return ITER_ADVANCE_B;
120
+ case EOF_A: *memo = Qtrue; break; // checked all in set_a
121
+ case EOF_B: *memo = Qfalse; break; // no more comparandi in set_b
122
+ }
123
+ return ITER_END;
124
+ }
125
+
126
+ // Returns Qtrue if SET_A is a subset (proper or not) of SET_B, else Qfalse.
127
+ static VALUE
128
+ method_subset_p(VALUE self, VALUE set_a, VALUE set_b) {
129
+ return parallel_compare(set_a, set_b, check_first_subset_of_second, Qfalse);
130
+ }
131
+
132
+ // Returns Qtrue if SET_A is a superset (proper or not) of SET_B, else Qfalse.
133
+ static VALUE
134
+ method_superset_p(VALUE self, VALUE set_a, VALUE set_b) {
135
+ return parallel_compare(set_b, set_a, check_first_subset_of_second, Qfalse);
136
+ }
137
+
138
+ // TODO: if (a > b max || b > a max) *memo = Qfalse; break; ?
139
+ static enum iter_action
140
+ check_if_intersect(enum iter_state state, VALUE* memo) {
141
+ switch(state) {
142
+ case A_LT_B: return ITER_ADVANCE_A;
143
+ case A_EQ_B: *memo = Qtrue; break; // found common member
144
+ case A_GT_B: return ITER_ADVANCE_B;
145
+ case EOF_A: *memo = Qfalse; break;
146
+ case EOF_B: *memo = Qfalse; break;
147
+ }
148
+ return ITER_END;
149
+ }
150
+
151
+ // Returns Qtrue if SET_A intersects with SET_B, else Qfalse.
152
+ static VALUE
153
+ method_intersect_p(VALUE self, VALUE set_a, VALUE set_b) {
154
+ return parallel_compare(set_a, set_b, check_if_intersect, Qfalse);
155
+ }
156
+
157
+ static void
158
+ set_max_ivar_for_set(VALUE set) {
159
+ struct LOC_st_stable_entry *entries;
160
+ st_index_t size;
161
+
162
+ entries = set_entries_ptr(set, &size);
163
+ if (size) rb_iv_set(set, "@max", entries[size - 1].key);
164
+ }
165
+
166
+ #define MEMO_HASH (memo[0])
167
+ #define MEMO_SET_A_DEPLETED (memo[1])
168
+ #define MEMO_SET_B_DEPLETED (memo[2])
169
+
170
+ // helper to process two sets and build a new one in parallel
171
+ static VALUE
172
+ parallel_build(VALUE set_a, VALUE set_b, proc_callback proc) {
173
+ VALUE new_set, new_hash, memo[3];
174
+
175
+ // prepare new Set
176
+ new_set = rb_class_new_instance(0, 0, RBASIC(set_a)->klass);
177
+ new_hash = rb_hash_new();
178
+ rb_iv_set(new_set, "@hash", new_hash);
179
+
180
+ MEMO_HASH = new_hash;
181
+ MEMO_SET_A_DEPLETED = 0;
182
+ MEMO_SET_B_DEPLETED = 0;
183
+
184
+ parallel_process(set_a, set_b, proc, (VALUE)memo);
185
+
186
+ set_max_ivar_for_set(new_set);
187
+ rb_obj_freeze(new_hash);
188
+
189
+ return new_set;
190
+ }
191
+
192
+ static enum iter_action
193
+ add_shared_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
194
+ VALUE *memo;
195
+
196
+ switch(state) {
197
+ case A_LT_B: return ITER_ADVANCE_A;
198
+ case A_EQ_B:
199
+ memo = (VALUE*)*memp;
200
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
201
+ return ITER_ADVANCE_BOTH;
202
+ case A_GT_B: return ITER_ADVANCE_B;
203
+ case EOF_A: break;
204
+ case EOF_B: break;
205
+ }
206
+ return ITER_END;
207
+ }
208
+
209
+ // Returns a new set containing all members shared by SET_A and SET_B.
210
+ static VALUE
211
+ method_intersection(VALUE self, VALUE set_a, VALUE set_b) {
212
+ return parallel_build(set_a, set_b, add_shared_to_hash);
213
+ }
214
+
215
+ static enum iter_action
216
+ add_any_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
217
+ VALUE *memo = (VALUE*)*memp;
218
+
219
+ switch(state) {
220
+ case A_LT_B:
221
+ if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
222
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
223
+ return ITER_ADVANCE_B;
224
+ }
225
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
226
+ return ITER_ADVANCE_A;
227
+ case A_EQ_B:
228
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
229
+ return ITER_ADVANCE_BOTH; // shared member
230
+ case A_GT_B:
231
+ if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
232
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
233
+ return ITER_ADVANCE_A;
234
+ }
235
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
236
+ return ITER_ADVANCE_B;
237
+ case EOF_A:
238
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
239
+ MEMO_SET_A_DEPLETED = 1;
240
+ if (MEMO_SET_B_DEPLETED) break; // break if both sets depleted
241
+ return ITER_ADVANCE_B;
242
+ case EOF_B:
243
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
244
+ MEMO_SET_B_DEPLETED = 1;
245
+ if (MEMO_SET_A_DEPLETED) break; // break if both sets depleted
246
+ return ITER_ADVANCE_A;
247
+ }
248
+ return ITER_END;
249
+ }
250
+
251
+ // Returns a new set that includes all members of SET_A and/or SET_B.
252
+ static VALUE
253
+ method_union(VALUE self, VALUE set_a, VALUE set_b) {
254
+ return parallel_build(set_a, set_b, add_any_members_to_hash);
255
+ }
256
+
257
+ #define INSERT_UNLESS_EQUAL(val, other, hsh) \
258
+ if (compare_any_values(val, other)) { st_insert(RHASH_TBL(hsh), val, Qtrue); }
259
+
260
+ static enum iter_action
261
+ add_nonb_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
262
+ VALUE *memo = (VALUE*)*memp;
263
+
264
+ switch(state) {
265
+ case A_LT_B:
266
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
267
+ return ITER_ADVANCE_A;
268
+ case A_EQ_B:
269
+ return ITER_ADVANCE_BOTH; // shared member
270
+ case A_GT_B:
271
+ if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
272
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
273
+ return ITER_ADVANCE_A;
274
+ }
275
+ return ITER_ADVANCE_B;
276
+ case EOF_A:
277
+ // if set b is also depleted, add a unless equal to final b
278
+ if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); }
279
+ break;
280
+ case EOF_B:
281
+ MEMO_SET_B_DEPLETED = 1;
282
+ return ITER_ADVANCE_A;
283
+ }
284
+ return ITER_END;
285
+ }
286
+
287
+ // Returns a new set that includes any member of either passed set.
288
+ static VALUE
289
+ method_difference(VALUE self, VALUE set_a, VALUE set_b) {
290
+ return parallel_build(set_a, set_b, add_nonb_members_to_hash);
291
+ }
292
+
293
+ static enum iter_action
294
+ add_xor_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
295
+ VALUE *memo = (VALUE*)*memp;
296
+
297
+ switch(state) {
298
+ case A_LT_B:
299
+ if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
300
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
301
+ return ITER_ADVANCE_B;
302
+ }
303
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
304
+ return ITER_ADVANCE_A;
305
+ case A_EQ_B:
306
+ return ITER_ADVANCE_BOTH; // shared member, skip
307
+ case A_GT_B:
308
+ if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
309
+ st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
310
+ return ITER_ADVANCE_A;
311
+ }
312
+ st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
313
+ return ITER_ADVANCE_B;
314
+ case EOF_A:
315
+ // if set b is also depleted, add a unless equal to final b and break
316
+ if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); break; }
317
+ INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); // add b unless equal to final a
318
+ MEMO_SET_A_DEPLETED = 1; // mark set a as depleted
319
+ return ITER_ADVANCE_B;
320
+ case EOF_B:
321
+ // if set a is also depleted, add b unless equal to final a and break
322
+ if (MEMO_SET_A_DEPLETED) { INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); break; }
323
+ INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); // add a unless equal to final b
324
+ MEMO_SET_B_DEPLETED = 1; // mark set b as depleted
325
+ return ITER_ADVANCE_A;
326
+ }
327
+ return ITER_END;
328
+ }
329
+
330
+ // Returns a new set that is a XOR result of SET_A and SET_B.
331
+ static VALUE
332
+ method_exclusion(VALUE self, VALUE set_a, VALUE set_b) {
333
+ return parallel_build(set_a, set_b, add_xor_members_to_hash);
334
+ }
335
+
336
+ #define INCR_FIXNUM_ID(id) (id += 2)
337
+ #define DECR_FIXNUM_ID(id) (id -= 2)
338
+
339
+ #define GET_RANGE_FIXNUM_IDS(range, from_id, upto_id) \
340
+ int excl; \
341
+ if (!rb_range_values(range, &from_id, &upto_id, &excl)) { \
342
+ rb_raise(rb_eArgError, "Pass a Range"); \
343
+ } \
344
+ if (excl) DECR_FIXNUM_ID(upto_id); \
345
+ Check_Type(from_id, T_FIXNUM); \
346
+ Check_Type(upto_id, T_FIXNUM);
347
+
348
+ // Fills HASH will all Fixnums in RANGE.
349
+ static VALUE
350
+ method_fill_with_fixnums(VALUE self, VALUE hash, VALUE range) {
351
+ VALUE from_id, upto_id;
352
+ st_table *tbl;
353
+
354
+ GET_RANGE_FIXNUM_IDS(range, from_id, upto_id);
355
+ tbl = RHASH_TBL(hash);
356
+
357
+ while (from_id <= upto_id) {
358
+ st_insert(tbl, from_id, Qtrue);
359
+ INCR_FIXNUM_ID(from_id);
360
+ }
361
+
362
+ return upto_id;
363
+ }
364
+
365
+ inline static void
366
+ insert_fixnum_id(st_table *tbl, VALUE id, int ucp_only) {
367
+ if (!ucp_only || id <= 0x1B000 || id >= 0x1C000) {
368
+ st_insert(tbl, id, Qtrue);
369
+ }
370
+ }
371
+
372
+ // Returns a new set that is a XOR result of SET and the given RANGE.
373
+ static VALUE
374
+ method_invert_fixnum_set(VALUE self, VALUE set, VALUE range, VALUE ucp) {
375
+ VALUE fixnum_id, upto_id, new_hash, new_set, entry;
376
+ st_index_t size, i;
377
+ int ucp_only;
378
+ st_table *new_tbl;
379
+ struct LOC_st_stable_entry *entries;
380
+
381
+ GET_RANGE_FIXNUM_IDS(range, fixnum_id, upto_id);
382
+ ucp_only = ucp != Qfalse && ucp != Qnil && ucp != Qundef;
383
+
384
+ // get set members
385
+ entries = set_entries_ptr(set, &size);
386
+
387
+ // prepare new Set
388
+ new_set = rb_class_new_instance(0, 0, RBASIC(set)->klass);
389
+ new_hash = rb_hash_new();
390
+ new_tbl = RHASH_TBL(new_hash);
391
+ rb_iv_set(new_set, "@hash", new_hash);
392
+
393
+ if (size) {
394
+ i = 0;
395
+ entry = entries[i].key;
396
+
397
+ // here is the optimization: skipping unneeded comparisons with lower values
398
+ for (;;) {
399
+ if (fixnum_id == entry) {
400
+ // fixnum_id is in set, compare next fixnum with next set member
401
+ entry = entries[++i].key;
402
+ INCR_FIXNUM_ID(fixnum_id);
403
+ if (i == size || fixnum_id > upto_id) break;
404
+ }
405
+ else if (fixnum_id < entry) {
406
+ // fixnum_id is not in set, include in inversion
407
+ insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
408
+ INCR_FIXNUM_ID(fixnum_id);
409
+ if (fixnum_id > upto_id) break;
410
+ }
411
+ else /* if (fixnum_id > entry) */ {
412
+ // gap; fixnum_id might be in set, check next set member
413
+ entry = entries[++i].key;
414
+ if (i == size) break;
415
+ }
416
+ }
417
+ }
418
+
419
+ // include all fixnums beyond the range of the set
420
+ while (fixnum_id <= upto_id) {
421
+ insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
422
+ INCR_FIXNUM_ID(fixnum_id);
423
+ }
424
+
425
+ set_max_ivar_for_set(new_set);
426
+ rb_obj_freeze(new_hash);
427
+
428
+ return new_set;
429
+ }
430
+
431
+ void Init_immutable_set() {
432
+ VALUE mod;
433
+ mod = rb_define_module("ImmutableSetExt");
434
+ rb_define_singleton_method(mod, "difference", method_difference, 2);
435
+ rb_define_singleton_method(mod, "exclusion", method_exclusion, 2);
436
+ rb_define_singleton_method(mod, "fill_with_fixnums", method_fill_with_fixnums, 2);
437
+ rb_define_singleton_method(mod, "intersect?", method_intersect_p, 2);
438
+ rb_define_singleton_method(mod, "intersection", method_intersection, 2);
439
+ rb_define_singleton_method(mod, "invert_fixnum_set", method_invert_fixnum_set, 3);
440
+ rb_define_singleton_method(mod, "subset?", method_subset_p, 2);
441
+ rb_define_singleton_method(mod, "superset?", method_superset_p, 2);
442
+ rb_define_singleton_method(mod, "union", method_union, 2);
443
+ }
444
+
445
+ #endif // end of #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES ... #else ...