immutable_set 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +31 -0
- data/.rspec +3 -0
- data/.travis.yml +10 -0
- data/BENCHMARK.md +131 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +83 -0
- data/Rakefile +85 -0
- data/bin/console +18 -0
- data/bin/setup +8 -0
- data/ext/immutable_set/extconf.rb +7 -0
- data/ext/immutable_set/immutable_set.c +445 -0
- data/immutable_set.gemspec +33 -0
- data/lib/immutable_set.rb +50 -0
- data/lib/immutable_set/builder_methods.rb +60 -0
- data/lib/immutable_set/disable_mutating_methods.rb +12 -0
- data/lib/immutable_set/inversion.rb +13 -0
- data/lib/immutable_set/native_ext.rb +19 -0
- data/lib/immutable_set/pure.rb +5 -0
- data/lib/immutable_set/ruby_fallback.rb +148 -0
- data/lib/immutable_set/stdlib_set_method_overrides.rb +155 -0
- data/lib/immutable_set/version.rb +3 -0
- metadata +137 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 142c33ab1a414a70f1d310c511174e4ca1491c25b73062057595b93412f4be54
|
4
|
+
data.tar.gz: 8b2bf71c740c2c1273c33524a0d9b5465a3bbfdc373079dd70025ec4da4a3804
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a1f80fe0c093925ed2012facf812be01d065e164199adb4d03e15ae311ab3c34d0a705fc86d90b9d187b9f3519bf3076e65199e566ea6f7b9fdd6b7c0f647871
|
7
|
+
data.tar.gz: 59628f65aad7609e958b1f8fa444b5d855dc87a66d5801ef74b099bb34dd76bcf9ce3897bcead01e5f0d73c38abe7c10f37da5498487bb7d37abb66e100481f8
|
data/.gitignore
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
*.bundle
|
2
|
+
*.gem
|
3
|
+
*.iml
|
4
|
+
*.stTheme.cache
|
5
|
+
*.sublime-project
|
6
|
+
*.sublime-workspace
|
7
|
+
*.swp
|
8
|
+
*.tmlanguage.cache
|
9
|
+
*.tmPreferences.cache
|
10
|
+
*~
|
11
|
+
.byebug_history
|
12
|
+
.DS_Store
|
13
|
+
.idea/
|
14
|
+
.ruby-gemset
|
15
|
+
.ruby-version
|
16
|
+
.tags
|
17
|
+
.tags1
|
18
|
+
bbin/
|
19
|
+
binstubs/*
|
20
|
+
bundler_stubs/*/.yardoc
|
21
|
+
Gemfile.lock
|
22
|
+
/.bundle/
|
23
|
+
/_yardoc/
|
24
|
+
/coverage/
|
25
|
+
/doc/
|
26
|
+
/pkg/
|
27
|
+
/spec/reports/
|
28
|
+
/tmp/
|
29
|
+
|
30
|
+
# rspec failure tracking
|
31
|
+
.rspec_status
|
data/.rspec
ADDED
data/.travis.yml
ADDED
data/BENCHMARK.md
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
Results of `rake:benchmark` on ruby 2.5.1p57 (2018-03-29 revision 63029) [x86_64-darwin17]
|
2
|
+
|
3
|
+
Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. If the `rbtree` gem is present, `SortedSet` will [use it](https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724) and become even slower.
|
4
|
+
|
5
|
+
```
|
6
|
+
#- with 5M overlapping items
|
7
|
+
gem: 6.6 i/s
|
8
|
+
gem w/o c: 0.8 i/s - 7.85x slower
|
9
|
+
stdlib: 0.7 i/s - 9.51x slower```
|
10
|
+
```
|
11
|
+
#- with 5M distinct items
|
12
|
+
gem: 1429392.7 i/s
|
13
|
+
gem w/o c: 1414260.7 i/s - same-ish
|
14
|
+
stdlib: 1.0 i/s - 1456728.62x slower```
|
15
|
+
```
|
16
|
+
#^ with 5M overlapping items
|
17
|
+
gem: 0.9 i/s
|
18
|
+
gem w/o C: 0.4 i/s - 2.12x slower
|
19
|
+
stdlib: 0.4 i/s - 2.16x slower
|
20
|
+
```
|
21
|
+
```
|
22
|
+
#^ with 5M distinct items
|
23
|
+
gem w/o C: 0.8 i/s
|
24
|
+
gem: 0.6 i/s - 1.25x slower
|
25
|
+
stdlib: 0.5 i/s - 1.65x slower
|
26
|
+
```
|
27
|
+
```
|
28
|
+
#intersect? with 5M intersecting items
|
29
|
+
gem: 266.8 i/s
|
30
|
+
gem w/o C: 8.2 i/s - 32.53x slower
|
31
|
+
stdlib: 2.2 i/s - 121.88x slower
|
32
|
+
```
|
33
|
+
```
|
34
|
+
#intersect? with 5M sparse items (rare case?)
|
35
|
+
gem w/o C: 1442.5 i/s
|
36
|
+
gem: 185.2 i/s - 7.79x slower
|
37
|
+
stdlib: 2.0 i/s - 712.75x slower
|
38
|
+
```
|
39
|
+
```
|
40
|
+
#intersect? with 5M distinct items
|
41
|
+
gem: 1376038.3 i/s
|
42
|
+
gem w/o C: 1375048.5 i/s - same-ish
|
43
|
+
stdlib: 2.0 i/s - 675307.67x slower
|
44
|
+
```
|
45
|
+
```
|
46
|
+
#& with 5M intersecting items
|
47
|
+
gem: 6.4 i/s
|
48
|
+
gem w/o C: 2.6 i/s - 2.49x slower
|
49
|
+
Array#&: 1.3 i/s - 4.83x slower
|
50
|
+
stdlib: 0.9 i/s - 6.90x slower
|
51
|
+
```
|
52
|
+
```
|
53
|
+
#& with 5M sparse items (rare case?)
|
54
|
+
gem: 88.3 i/s
|
55
|
+
gem w/o C: 19.6 i/s - 4.50x slower
|
56
|
+
stdlib: 2.0 i/s - 44.46x slower
|
57
|
+
Array#&: 1.8 i/s - 49.61x slower
|
58
|
+
```
|
59
|
+
```
|
60
|
+
#& with 5M distinct items
|
61
|
+
gem w/o C: 578891.9 i/s
|
62
|
+
gem: 571604.2 i/s - same-ish
|
63
|
+
stdlib: 2.1 i/s - 281016.75x slower
|
64
|
+
Array#&: 1.8 i/s - 316493.80x slower
|
65
|
+
```
|
66
|
+
```
|
67
|
+
#inversion with 5M items
|
68
|
+
gem: 1.8 i/s
|
69
|
+
gem w/o C: 0.7 i/s - 2.58x slower
|
70
|
+
stdlib #-: 0.3 i/s - 6.67x slower
|
71
|
+
```
|
72
|
+
```
|
73
|
+
#inversion with 100k items
|
74
|
+
gem: 239.5 i/s
|
75
|
+
gem w/o C: 62.8 i/s - 3.81x slower
|
76
|
+
stdlib #-: 29.2 i/s - 8.22x slower
|
77
|
+
```
|
78
|
+
```
|
79
|
+
#minmax with 10M items
|
80
|
+
gem: 3180102.2 i/s
|
81
|
+
gem w/o C: 3170355.3 i/s - same-ish
|
82
|
+
stdlib: 5.3 i/s - 595743.46x slower
|
83
|
+
```
|
84
|
+
```
|
85
|
+
#minmax with 1M items
|
86
|
+
gem: 3247178.7 i/s
|
87
|
+
gem w/o C: 3231669.0 i/s - same-ish
|
88
|
+
stdlib: 52.8 i/s - 61535.19x slower
|
89
|
+
```
|
90
|
+
```
|
91
|
+
::new with 5M Range items
|
92
|
+
gem: 0.8 i/s
|
93
|
+
gem w/o C: 0.6 i/s - 1.27x slower
|
94
|
+
stdlib: 0.4 i/s - 1.78x slower
|
95
|
+
```
|
96
|
+
```
|
97
|
+
::new with 100k Range items
|
98
|
+
gem: 126.7 i/s
|
99
|
+
gem w/o C: 69.2 i/s - 1.83x slower
|
100
|
+
stdlib: 33.1 i/s - 3.83x slower
|
101
|
+
```
|
102
|
+
```
|
103
|
+
::new with 10k Range items in 10 non-continuous Ranges
|
104
|
+
gem: 3117.6 i/s
|
105
|
+
gem w/o C: 1326.2 i/s - 2.35x slower
|
106
|
+
stdlib: 666.7 i/s - 4.68x slower
|
107
|
+
```
|
108
|
+
```
|
109
|
+
#(proper_)subset/superset? with 5M subset items
|
110
|
+
gem: 50.8 i/s
|
111
|
+
gem w/o C: 1.4 i/s - 37.61x slower
|
112
|
+
stdlib: 1.3 i/s - 37.71x slower
|
113
|
+
```
|
114
|
+
```
|
115
|
+
#(proper_)subset/superset? with 5M overlapping items
|
116
|
+
gem: 51.0 i/s
|
117
|
+
gem w/o C: 1.4 i/s - 36.49x slower
|
118
|
+
stdlib: 1.4 i/s - 36.74x slower
|
119
|
+
```
|
120
|
+
```
|
121
|
+
#(proper_)subset/superset? with 100k overlapping items
|
122
|
+
gem: 3238.3 i/s
|
123
|
+
stdlib: 302.9 i/s - 10.69x slower
|
124
|
+
gem w/o C: 281.8 i/s - 11.49x slower
|
125
|
+
```
|
126
|
+
```
|
127
|
+
#+ with 5M overlapping items
|
128
|
+
gem: 1.4 i/s
|
129
|
+
stdlib: 1.2 i/s - 1.19x slower
|
130
|
+
gem w/o C: 0.9 i/s - 1.49x slower
|
131
|
+
```
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Jannosch Müller
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
[](http://badge.fury.io/rb/immutable_set)
|
2
|
+
[](https://travis-ci.org/janosch-x/immutable_set)
|
3
|
+
|
4
|
+
# ImmutableSet
|
5
|
+
|
6
|
+
A faster, immutable replacement for Ruby's [`Set`](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html).
|
7
|
+
|
8
|
+
On Ruby >= 2.4, all operations are faster, some by several orders of magnitude (see [benchmarks](./BENCHMARK.md)).
|
9
|
+
|
10
|
+
#### Useful for ...
|
11
|
+
|
12
|
+
- creating and working with large sorted sets
|
13
|
+
- intersecting, merging, diffing, checking for subsets etc.
|
14
|
+
- the [advantages of immutability](https://hackernoon.com/f98e7e85b6ac)
|
15
|
+
|
16
|
+
#### Not useful for ...
|
17
|
+
|
18
|
+
- small sets and other cases where performance is negligible
|
19
|
+
- sets with mixed members or any members that are not mutually comparable
|
20
|
+
- doing a lot of adding, removing, and checking of single items
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
require 'immutable_set'
|
26
|
+
|
27
|
+
class MySet < ImmutableSet; end
|
28
|
+
```
|
29
|
+
|
30
|
+
Mutating methods of `Set` (e.g. `#add`, `#delete`) are removed. They can be substituted like this if needed:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
set1 = MySet[1, 2, 3]
|
34
|
+
set1 += MySet[4] # => MySet[1, 2, 3, 4]
|
35
|
+
set1 -= MySet[3] # => MySet[1, 2, 4]
|
36
|
+
```
|
37
|
+
|
38
|
+
Immutability is required for most of the [performance optimizations](#performance-optimizations).
|
39
|
+
|
40
|
+
All other methods behave as in `Set`/`SortedSet`, so see the [official documentation](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html) for details about what they do.
|
41
|
+
|
42
|
+
## New methods
|
43
|
+
|
44
|
+
**#distinct_bounds?**
|
45
|
+
|
46
|
+
Returns true iff the passed set is beyond the `#minmax` boundaries of `self`.
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
MySet[2, 4].distinct_bounds?(MySet[3]) # => false
|
50
|
+
MySet[2, 4].distinct_bounds?(MySet[5]) # => true
|
51
|
+
```
|
52
|
+
|
53
|
+
**::from_ranges**
|
54
|
+
|
55
|
+
Returns a set built from all passed `Ranges`.
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
MySet.from_ranges(2..4, 6..8) # => MySet[2, 3, 4, 6, 7, 8]
|
59
|
+
```
|
60
|
+
|
61
|
+
**#inversion**
|
62
|
+
|
63
|
+
Returns a new set of the same class, containing all members `from`..`upto` that are not in `self`. Faster than `Set.new(from..upto) - self`.
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
MySet[3, 5].inversion(from: 1, upto: 4) # => MySet[1, 2, 4]`
|
67
|
+
MySet['c'].inversion(from: 'a', upto: 'd') # => MySet['a', 'b', 'd']
|
68
|
+
```
|
69
|
+
|
70
|
+
## Performance optimizations
|
71
|
+
|
72
|
+
The cost of many methods is reduced from O(m*n) to O(m+n) or better. The underlying ideas are:
|
73
|
+
|
74
|
+
- never needing to sort, because the internal `@hash` is built in order and then frozen
|
75
|
+
- remembering `#max` cheaply whenever possible
|
76
|
+
- this allows skipping unneeded checks for members outside the own `#minmax` boundaries
|
77
|
+
- avoiding unneeded lookups during comparisons by iterating over both sets in parallel in C
|
78
|
+
- parallel iteration can skip over gaps in either set since both hashes are ordered
|
79
|
+
- when using Ruby, preferring `#while` over slower, scope-building iteration methods
|
80
|
+
|
81
|
+
## Benchmarks
|
82
|
+
|
83
|
+
Run `rake benchmark` or see [BENCHMARK.md](./BENCHMARK.md).
|
data/Rakefile
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task default: :spec
|
7
|
+
|
8
|
+
require 'rake/extensiontask'
|
9
|
+
|
10
|
+
Rake::ExtensionTask.new('immutable_set') do |ext|
|
11
|
+
ext.lib_dir = 'lib/immutable_set'
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Download relevant ruby/spec tests, adapt to ImmutableSet and its variants'
|
15
|
+
task :sync_ruby_spec do
|
16
|
+
require 'fileutils'
|
17
|
+
|
18
|
+
variants = {
|
19
|
+
'ImmutableSet' => './spec/ruby-spec/library/immutable_set',
|
20
|
+
'ImmutableSet::Pure' => './spec/ruby-spec/library/immutable_set_pure',
|
21
|
+
}
|
22
|
+
variants.each { |_, dir| FileUtils.rm_rf(dir) if File.exist?(base_dir) }
|
23
|
+
|
24
|
+
`svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{base_dir}`
|
25
|
+
|
26
|
+
base = variants.first[1]
|
27
|
+
variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
|
28
|
+
|
29
|
+
variants.each.with_index do |(class_name, dir), i|
|
30
|
+
Dir["#{dir}/**/*.rb"].each do |spec|
|
31
|
+
if spec =~ %r{/(add|append|case|clear|collect|delete|filter|flatten|
|
32
|
+
initialize|keep_if|map|merge|replace|reject|select|subtract)}x
|
33
|
+
File.delete(spec)
|
34
|
+
next
|
35
|
+
end
|
36
|
+
|
37
|
+
# `i` must be added to shared example names or they'll override each other
|
38
|
+
adapted_content =
|
39
|
+
File
|
40
|
+
.read(spec)
|
41
|
+
.gsub('SortedSet', class_name)
|
42
|
+
.gsub('sorted_set_', "sorted_set_#{i}_")
|
43
|
+
.gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |method|')
|
44
|
+
.gsub('@method', 'method')
|
45
|
+
.gsub(/be_(false|true)/, 'be \1')
|
46
|
+
.gsub('mock', 'double')
|
47
|
+
|
48
|
+
File.open(spec, 'w') { |f| f.puts adapted_content }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
desc 'Run all IPS benchmarks'
|
54
|
+
task :benchmark do
|
55
|
+
Dir['./benchmarks/*.rb'].sort.each { |file| require file }
|
56
|
+
end
|
57
|
+
|
58
|
+
namespace :benchmark do
|
59
|
+
desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
|
60
|
+
task :write_to_file do
|
61
|
+
$store_comparison_results = {}
|
62
|
+
|
63
|
+
Rake.application[:benchmark].invoke
|
64
|
+
|
65
|
+
File.open('BENCHMARK.md', 'w') do |f|
|
66
|
+
f.puts "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}",
|
67
|
+
'',
|
68
|
+
'Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. '\
|
69
|
+
'If the `rbtree` gem is present, `SortedSet` will [use it]'\
|
70
|
+
'(https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724)'\
|
71
|
+
' and become even slower.',
|
72
|
+
'',
|
73
|
+
|
74
|
+
$store_comparison_results.each do |caption, result|
|
75
|
+
f.puts '```', caption, result.strip.gsub(/(same-ish).*$/, '\1').lines[1..-1], '```'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
unless RUBY_PLATFORM =~ /java/
|
82
|
+
# recompile before benchmarking or running specs
|
83
|
+
task(:benchmark).enhance([:compile])
|
84
|
+
task(:spec).enhance([:compile])
|
85
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'immutable_set'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
class S < SortedSet; end
|
14
|
+
class I < ImmutableSet; end
|
15
|
+
class P < ImmutableSet::Pure; end
|
16
|
+
|
17
|
+
require "irb"
|
18
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,445 @@
|
|
1
|
+
// This speeds up set methods that take an enum by iterating both in parallel.
|
2
|
+
// Simple sanity checks and casting the arg to set are done in Ruby beforehand.
|
3
|
+
// Internal hashes of the recipient and arg must have been created in order.
|
4
|
+
|
5
|
+
#ifndef HAVE_STRUCT_ST_TABLE_ENTRIES
|
6
|
+
// the optional extension doesn't work for ruby < 2.4, skip defining module
|
7
|
+
void Init_immutable_set() {}
|
8
|
+
#else
|
9
|
+
|
10
|
+
#include "ruby.h"
|
11
|
+
#include "ruby/st.h"
|
12
|
+
|
13
|
+
enum iter_state {A_LT_B = -1, A_EQ_B = 0, A_GT_B = 1, EOF_A = -2, EOF_B = -3};
|
14
|
+
enum iter_action {ITER_ADVANCE_A, ITER_ADVANCE_B, ITER_ADVANCE_BOTH, ITER_END};
|
15
|
+
|
16
|
+
typedef enum iter_state(*compare_function)(VALUE, VALUE);
|
17
|
+
typedef enum iter_action(*comp_callback)(enum iter_state, VALUE*);
|
18
|
+
typedef enum iter_action(*proc_callback)(enum iter_state, VALUE*, VALUE, VALUE);
|
19
|
+
|
20
|
+
static enum iter_state
|
21
|
+
compare_fixnum_values(VALUE a, VALUE b) {
|
22
|
+
if (a < b) return A_LT_B;
|
23
|
+
if (a > b) return A_GT_B;
|
24
|
+
return A_EQ_B;
|
25
|
+
}
|
26
|
+
|
27
|
+
static enum iter_state
|
28
|
+
compare_any_values(VALUE a, VALUE b) {
|
29
|
+
return rb_cmpint(rb_funcallv(a, rb_intern("<=>"), 1, &b), a, b);
|
30
|
+
}
|
31
|
+
|
32
|
+
#ifndef STRING_P
|
33
|
+
# define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
|
34
|
+
#endif
|
35
|
+
|
36
|
+
static compare_function
|
37
|
+
optimal_compare_function(VALUE set_a, VALUE set_b) {
|
38
|
+
VALUE max_a, max_b;
|
39
|
+
|
40
|
+
max_a = rb_iv_get(set_a, "@max");
|
41
|
+
max_b = rb_iv_get(set_b, "@max");
|
42
|
+
|
43
|
+
if (FIXNUM_P(max_a) && FIXNUM_P(max_b)) return compare_fixnum_values;
|
44
|
+
if (STRING_P(max_a) && STRING_P(max_b)) return rb_str_cmp;
|
45
|
+
return compare_any_values;
|
46
|
+
}
|
47
|
+
|
48
|
+
struct LOC_st_stable_entry {
|
49
|
+
st_index_t hash;
|
50
|
+
st_data_t key;
|
51
|
+
st_data_t record;
|
52
|
+
};
|
53
|
+
|
54
|
+
static struct LOC_st_stable_entry*
|
55
|
+
set_entries_ptr(VALUE set, st_index_t* size_ptr) {
|
56
|
+
VALUE hash;
|
57
|
+
|
58
|
+
hash = rb_iv_get(set, "@hash");
|
59
|
+
*size_ptr = RHASH_SIZE(hash);
|
60
|
+
|
61
|
+
return (struct LOC_st_stable_entry*)RHASH_TBL(hash)->entries;
|
62
|
+
}
|
63
|
+
|
64
|
+
#define PARALLEL_ITERATE(...) \
|
65
|
+
st_index_t size_a, size_b, i, j; \
|
66
|
+
compare_function compare_func; \
|
67
|
+
enum iter_state state; \
|
68
|
+
struct LOC_st_stable_entry *entries_a, *entries_b; \
|
69
|
+
VALUE entry_a, entry_b; \
|
70
|
+
\
|
71
|
+
entries_a = set_entries_ptr(set_a, &size_a); \
|
72
|
+
entries_b = set_entries_ptr(set_b, &size_b); \
|
73
|
+
if (!size_a || !size_b) return memo; \
|
74
|
+
\
|
75
|
+
i = j = 0; \
|
76
|
+
entry_a = entries_a[i].key; \
|
77
|
+
entry_b = entries_b[j].key; \
|
78
|
+
compare_func = optimal_compare_function(set_a, set_b); \
|
79
|
+
\
|
80
|
+
for (;;) { \
|
81
|
+
state = (*compare_func)(entry_a, entry_b); \
|
82
|
+
\
|
83
|
+
eval_state: \
|
84
|
+
switch((*callback)(state, __VA_ARGS__)) { \
|
85
|
+
case ITER_ADVANCE_A: \
|
86
|
+
if (++i >= size_a) { state = EOF_A; goto eval_state; } \
|
87
|
+
entry_a = entries_a[i].key; \
|
88
|
+
continue; \
|
89
|
+
case ITER_ADVANCE_B: \
|
90
|
+
if (++j >= size_b) { state = EOF_B; goto eval_state; } \
|
91
|
+
entry_b = entries_b[j].key; \
|
92
|
+
continue; \
|
93
|
+
case ITER_ADVANCE_BOTH: \
|
94
|
+
if (++i >= size_a) { state = EOF_A; goto eval_state; } \
|
95
|
+
entry_a = entries_a[i].key; \
|
96
|
+
if (++j >= size_b) { state = EOF_B; goto eval_state; } \
|
97
|
+
entry_b = entries_b[j].key; \
|
98
|
+
continue; \
|
99
|
+
case ITER_END: \
|
100
|
+
return memo; \
|
101
|
+
} \
|
102
|
+
} \
|
103
|
+
|
104
|
+
static VALUE
|
105
|
+
parallel_compare(VALUE set_a, VALUE set_b, comp_callback callback, VALUE memo) {
|
106
|
+
PARALLEL_ITERATE(&memo);
|
107
|
+
}
|
108
|
+
|
109
|
+
static VALUE
|
110
|
+
parallel_process(VALUE set_a, VALUE set_b, proc_callback callback, VALUE memo) {
|
111
|
+
PARALLEL_ITERATE(&memo, entry_a, entry_b);
|
112
|
+
}
|
113
|
+
|
114
|
+
static enum iter_action
|
115
|
+
check_first_subset_of_second(enum iter_state state, VALUE* memo) {
|
116
|
+
switch(state) {
|
117
|
+
case A_LT_B: *memo = Qfalse; break; // entry_a not in set_b
|
118
|
+
case A_EQ_B: return ITER_ADVANCE_BOTH;
|
119
|
+
case A_GT_B: return ITER_ADVANCE_B;
|
120
|
+
case EOF_A: *memo = Qtrue; break; // checked all in set_a
|
121
|
+
case EOF_B: *memo = Qfalse; break; // no more comparandi in set_b
|
122
|
+
}
|
123
|
+
return ITER_END;
|
124
|
+
}
|
125
|
+
|
126
|
+
// Returns Qtrue if SET_A is a subset (proper or not) of SET_B, else Qfalse.
|
127
|
+
static VALUE
|
128
|
+
method_subset_p(VALUE self, VALUE set_a, VALUE set_b) {
|
129
|
+
return parallel_compare(set_a, set_b, check_first_subset_of_second, Qfalse);
|
130
|
+
}
|
131
|
+
|
132
|
+
// Returns Qtrue if SET_A is a superset (proper or not) of SET_B, else Qfalse.
|
133
|
+
static VALUE
|
134
|
+
method_superset_p(VALUE self, VALUE set_a, VALUE set_b) {
|
135
|
+
return parallel_compare(set_b, set_a, check_first_subset_of_second, Qfalse);
|
136
|
+
}
|
137
|
+
|
138
|
+
// TODO: if (a > b max || b > a max) *memo = Qfalse; break; ?
|
139
|
+
static enum iter_action
|
140
|
+
check_if_intersect(enum iter_state state, VALUE* memo) {
|
141
|
+
switch(state) {
|
142
|
+
case A_LT_B: return ITER_ADVANCE_A;
|
143
|
+
case A_EQ_B: *memo = Qtrue; break; // found common member
|
144
|
+
case A_GT_B: return ITER_ADVANCE_B;
|
145
|
+
case EOF_A: *memo = Qfalse; break;
|
146
|
+
case EOF_B: *memo = Qfalse; break;
|
147
|
+
}
|
148
|
+
return ITER_END;
|
149
|
+
}
|
150
|
+
|
151
|
+
// Returns Qtrue if SET_A intersects with SET_B, else Qfalse.
|
152
|
+
static VALUE
|
153
|
+
method_intersect_p(VALUE self, VALUE set_a, VALUE set_b) {
|
154
|
+
return parallel_compare(set_a, set_b, check_if_intersect, Qfalse);
|
155
|
+
}
|
156
|
+
|
157
|
+
static void
|
158
|
+
set_max_ivar_for_set(VALUE set) {
|
159
|
+
struct LOC_st_stable_entry *entries;
|
160
|
+
st_index_t size;
|
161
|
+
|
162
|
+
entries = set_entries_ptr(set, &size);
|
163
|
+
if (size) rb_iv_set(set, "@max", entries[size - 1].key);
|
164
|
+
}
|
165
|
+
|
166
|
+
#define MEMO_HASH (memo[0])
|
167
|
+
#define MEMO_SET_A_DEPLETED (memo[1])
|
168
|
+
#define MEMO_SET_B_DEPLETED (memo[2])
|
169
|
+
|
170
|
+
// helper to process two sets and build a new one in parallel
|
171
|
+
static VALUE
|
172
|
+
parallel_build(VALUE set_a, VALUE set_b, proc_callback proc) {
|
173
|
+
VALUE new_set, new_hash, memo[3];
|
174
|
+
|
175
|
+
// prepare new Set
|
176
|
+
new_set = rb_class_new_instance(0, 0, RBASIC(set_a)->klass);
|
177
|
+
new_hash = rb_hash_new();
|
178
|
+
rb_iv_set(new_set, "@hash", new_hash);
|
179
|
+
|
180
|
+
MEMO_HASH = new_hash;
|
181
|
+
MEMO_SET_A_DEPLETED = 0;
|
182
|
+
MEMO_SET_B_DEPLETED = 0;
|
183
|
+
|
184
|
+
parallel_process(set_a, set_b, proc, (VALUE)memo);
|
185
|
+
|
186
|
+
set_max_ivar_for_set(new_set);
|
187
|
+
rb_obj_freeze(new_hash);
|
188
|
+
|
189
|
+
return new_set;
|
190
|
+
}
|
191
|
+
|
192
|
+
static enum iter_action
|
193
|
+
add_shared_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
194
|
+
VALUE *memo;
|
195
|
+
|
196
|
+
switch(state) {
|
197
|
+
case A_LT_B: return ITER_ADVANCE_A;
|
198
|
+
case A_EQ_B:
|
199
|
+
memo = (VALUE*)*memp;
|
200
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
201
|
+
return ITER_ADVANCE_BOTH;
|
202
|
+
case A_GT_B: return ITER_ADVANCE_B;
|
203
|
+
case EOF_A: break;
|
204
|
+
case EOF_B: break;
|
205
|
+
}
|
206
|
+
return ITER_END;
|
207
|
+
}
|
208
|
+
|
209
|
+
// Returns a new set containing all members shared by SET_A and SET_B.
|
210
|
+
static VALUE
|
211
|
+
method_intersection(VALUE self, VALUE set_a, VALUE set_b) {
|
212
|
+
return parallel_build(set_a, set_b, add_shared_to_hash);
|
213
|
+
}
|
214
|
+
|
215
|
+
static enum iter_action
|
216
|
+
add_any_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
217
|
+
VALUE *memo = (VALUE*)*memp;
|
218
|
+
|
219
|
+
switch(state) {
|
220
|
+
case A_LT_B:
|
221
|
+
if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
|
222
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
223
|
+
return ITER_ADVANCE_B;
|
224
|
+
}
|
225
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
226
|
+
return ITER_ADVANCE_A;
|
227
|
+
case A_EQ_B:
|
228
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
229
|
+
return ITER_ADVANCE_BOTH; // shared member
|
230
|
+
case A_GT_B:
|
231
|
+
if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
|
232
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
233
|
+
return ITER_ADVANCE_A;
|
234
|
+
}
|
235
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
236
|
+
return ITER_ADVANCE_B;
|
237
|
+
case EOF_A:
|
238
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
239
|
+
MEMO_SET_A_DEPLETED = 1;
|
240
|
+
if (MEMO_SET_B_DEPLETED) break; // break if both sets depleted
|
241
|
+
return ITER_ADVANCE_B;
|
242
|
+
case EOF_B:
|
243
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
244
|
+
MEMO_SET_B_DEPLETED = 1;
|
245
|
+
if (MEMO_SET_A_DEPLETED) break; // break if both sets depleted
|
246
|
+
return ITER_ADVANCE_A;
|
247
|
+
}
|
248
|
+
return ITER_END;
|
249
|
+
}
|
250
|
+
|
251
|
+
// Returns a new set that includes all members of SET_A and/or SET_B.
|
252
|
+
static VALUE
|
253
|
+
method_union(VALUE self, VALUE set_a, VALUE set_b) {
|
254
|
+
return parallel_build(set_a, set_b, add_any_members_to_hash);
|
255
|
+
}
|
256
|
+
|
257
|
+
#define INSERT_UNLESS_EQUAL(val, other, hsh) \
|
258
|
+
if (compare_any_values(val, other)) { st_insert(RHASH_TBL(hsh), val, Qtrue); }
|
259
|
+
|
260
|
+
static enum iter_action
|
261
|
+
add_nonb_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
262
|
+
VALUE *memo = (VALUE*)*memp;
|
263
|
+
|
264
|
+
switch(state) {
|
265
|
+
case A_LT_B:
|
266
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
267
|
+
return ITER_ADVANCE_A;
|
268
|
+
case A_EQ_B:
|
269
|
+
return ITER_ADVANCE_BOTH; // shared member
|
270
|
+
case A_GT_B:
|
271
|
+
if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
|
272
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
273
|
+
return ITER_ADVANCE_A;
|
274
|
+
}
|
275
|
+
return ITER_ADVANCE_B;
|
276
|
+
case EOF_A:
|
277
|
+
// if set b is also depleted, add a unless equal to final b
|
278
|
+
if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); }
|
279
|
+
break;
|
280
|
+
case EOF_B:
|
281
|
+
MEMO_SET_B_DEPLETED = 1;
|
282
|
+
return ITER_ADVANCE_A;
|
283
|
+
}
|
284
|
+
return ITER_END;
|
285
|
+
}
|
286
|
+
|
287
|
+
// Returns a new set that includes any member of either passed set.
|
288
|
+
static VALUE
|
289
|
+
method_difference(VALUE self, VALUE set_a, VALUE set_b) {
|
290
|
+
return parallel_build(set_a, set_b, add_nonb_members_to_hash);
|
291
|
+
}
|
292
|
+
|
293
|
+
static enum iter_action
|
294
|
+
add_xor_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
295
|
+
VALUE *memo = (VALUE*)*memp;
|
296
|
+
|
297
|
+
switch(state) {
|
298
|
+
case A_LT_B:
|
299
|
+
if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
|
300
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
301
|
+
return ITER_ADVANCE_B;
|
302
|
+
}
|
303
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
304
|
+
return ITER_ADVANCE_A;
|
305
|
+
case A_EQ_B:
|
306
|
+
return ITER_ADVANCE_BOTH; // shared member, skip
|
307
|
+
case A_GT_B:
|
308
|
+
if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
|
309
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
310
|
+
return ITER_ADVANCE_A;
|
311
|
+
}
|
312
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
313
|
+
return ITER_ADVANCE_B;
|
314
|
+
case EOF_A:
|
315
|
+
// if set b is also depleted, add a unless equal to final b and break
|
316
|
+
if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); break; }
|
317
|
+
INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); // add b unless equal to final a
|
318
|
+
MEMO_SET_A_DEPLETED = 1; // mark set a as depleted
|
319
|
+
return ITER_ADVANCE_B;
|
320
|
+
case EOF_B:
|
321
|
+
// if set a is also depleted, add b unless equal to final a and break
|
322
|
+
if (MEMO_SET_A_DEPLETED) { INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); break; }
|
323
|
+
INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); // add a unless equal to final b
|
324
|
+
MEMO_SET_B_DEPLETED = 1; // mark set b as depleted
|
325
|
+
return ITER_ADVANCE_A;
|
326
|
+
}
|
327
|
+
return ITER_END;
|
328
|
+
}
|
329
|
+
|
330
|
+
// Returns a new set that is a XOR result of SET_A and SET_B.
|
331
|
+
static VALUE
|
332
|
+
method_exclusion(VALUE self, VALUE set_a, VALUE set_b) {
|
333
|
+
return parallel_build(set_a, set_b, add_xor_members_to_hash);
|
334
|
+
}
|
335
|
+
|
336
|
+
#define INCR_FIXNUM_ID(id) (id += 2)
|
337
|
+
#define DECR_FIXNUM_ID(id) (id -= 2)
|
338
|
+
|
339
|
+
#define GET_RANGE_FIXNUM_IDS(range, from_id, upto_id) \
|
340
|
+
int excl; \
|
341
|
+
if (!rb_range_values(range, &from_id, &upto_id, &excl)) { \
|
342
|
+
rb_raise(rb_eArgError, "Pass a Range"); \
|
343
|
+
} \
|
344
|
+
if (excl) DECR_FIXNUM_ID(upto_id); \
|
345
|
+
Check_Type(from_id, T_FIXNUM); \
|
346
|
+
Check_Type(upto_id, T_FIXNUM);
|
347
|
+
|
348
|
+
// Fills HASH will all Fixnums in RANGE.
|
349
|
+
static VALUE
|
350
|
+
method_fill_with_fixnums(VALUE self, VALUE hash, VALUE range) {
|
351
|
+
VALUE from_id, upto_id;
|
352
|
+
st_table *tbl;
|
353
|
+
|
354
|
+
GET_RANGE_FIXNUM_IDS(range, from_id, upto_id);
|
355
|
+
tbl = RHASH_TBL(hash);
|
356
|
+
|
357
|
+
while (from_id <= upto_id) {
|
358
|
+
st_insert(tbl, from_id, Qtrue);
|
359
|
+
INCR_FIXNUM_ID(from_id);
|
360
|
+
}
|
361
|
+
|
362
|
+
return upto_id;
|
363
|
+
}
|
364
|
+
|
365
|
+
inline static void
|
366
|
+
insert_fixnum_id(st_table *tbl, VALUE id, int ucp_only) {
|
367
|
+
if (!ucp_only || id <= 0x1B000 || id >= 0x1C000) {
|
368
|
+
st_insert(tbl, id, Qtrue);
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
// Returns a new set that is a XOR result of SET and the given RANGE.
|
373
|
+
static VALUE
|
374
|
+
method_invert_fixnum_set(VALUE self, VALUE set, VALUE range, VALUE ucp) {
|
375
|
+
VALUE fixnum_id, upto_id, new_hash, new_set, entry;
|
376
|
+
st_index_t size, i;
|
377
|
+
int ucp_only;
|
378
|
+
st_table *new_tbl;
|
379
|
+
struct LOC_st_stable_entry *entries;
|
380
|
+
|
381
|
+
GET_RANGE_FIXNUM_IDS(range, fixnum_id, upto_id);
|
382
|
+
ucp_only = ucp != Qfalse && ucp != Qnil && ucp != Qundef;
|
383
|
+
|
384
|
+
// get set members
|
385
|
+
entries = set_entries_ptr(set, &size);
|
386
|
+
|
387
|
+
// prepare new Set
|
388
|
+
new_set = rb_class_new_instance(0, 0, RBASIC(set)->klass);
|
389
|
+
new_hash = rb_hash_new();
|
390
|
+
new_tbl = RHASH_TBL(new_hash);
|
391
|
+
rb_iv_set(new_set, "@hash", new_hash);
|
392
|
+
|
393
|
+
if (size) {
|
394
|
+
i = 0;
|
395
|
+
entry = entries[i].key;
|
396
|
+
|
397
|
+
// here is the optimization: skipping unneeded comparisons with lower values
|
398
|
+
for (;;) {
|
399
|
+
if (fixnum_id == entry) {
|
400
|
+
// fixnum_id is in set, compare next fixnum with next set member
|
401
|
+
entry = entries[++i].key;
|
402
|
+
INCR_FIXNUM_ID(fixnum_id);
|
403
|
+
if (i == size || fixnum_id > upto_id) break;
|
404
|
+
}
|
405
|
+
else if (fixnum_id < entry) {
|
406
|
+
// fixnum_id is not in set, include in inversion
|
407
|
+
insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
|
408
|
+
INCR_FIXNUM_ID(fixnum_id);
|
409
|
+
if (fixnum_id > upto_id) break;
|
410
|
+
}
|
411
|
+
else /* if (fixnum_id > entry) */ {
|
412
|
+
// gap; fixnum_id might be in set, check next set member
|
413
|
+
entry = entries[++i].key;
|
414
|
+
if (i == size) break;
|
415
|
+
}
|
416
|
+
}
|
417
|
+
}
|
418
|
+
|
419
|
+
// include all fixnums beyond the range of the set
|
420
|
+
while (fixnum_id <= upto_id) {
|
421
|
+
insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
|
422
|
+
INCR_FIXNUM_ID(fixnum_id);
|
423
|
+
}
|
424
|
+
|
425
|
+
set_max_ivar_for_set(new_set);
|
426
|
+
rb_obj_freeze(new_hash);
|
427
|
+
|
428
|
+
return new_set;
|
429
|
+
}
|
430
|
+
|
431
|
+
void Init_immutable_set() {
|
432
|
+
VALUE mod;
|
433
|
+
mod = rb_define_module("ImmutableSetExt");
|
434
|
+
rb_define_singleton_method(mod, "difference", method_difference, 2);
|
435
|
+
rb_define_singleton_method(mod, "exclusion", method_exclusion, 2);
|
436
|
+
rb_define_singleton_method(mod, "fill_with_fixnums", method_fill_with_fixnums, 2);
|
437
|
+
rb_define_singleton_method(mod, "intersect?", method_intersect_p, 2);
|
438
|
+
rb_define_singleton_method(mod, "intersection", method_intersection, 2);
|
439
|
+
rb_define_singleton_method(mod, "invert_fixnum_set", method_invert_fixnum_set, 3);
|
440
|
+
rb_define_singleton_method(mod, "subset?", method_subset_p, 2);
|
441
|
+
rb_define_singleton_method(mod, "superset?", method_superset_p, 2);
|
442
|
+
rb_define_singleton_method(mod, "union", method_union, 2);
|
443
|
+
}
|
444
|
+
|
445
|
+
#endif // end of #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES ... #else ...
|