immutable_set 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +31 -0
- data/.rspec +3 -0
- data/.travis.yml +10 -0
- data/BENCHMARK.md +131 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +83 -0
- data/Rakefile +85 -0
- data/bin/console +18 -0
- data/bin/setup +8 -0
- data/ext/immutable_set/extconf.rb +7 -0
- data/ext/immutable_set/immutable_set.c +445 -0
- data/immutable_set.gemspec +33 -0
- data/lib/immutable_set.rb +50 -0
- data/lib/immutable_set/builder_methods.rb +60 -0
- data/lib/immutable_set/disable_mutating_methods.rb +12 -0
- data/lib/immutable_set/inversion.rb +13 -0
- data/lib/immutable_set/native_ext.rb +19 -0
- data/lib/immutable_set/pure.rb +5 -0
- data/lib/immutable_set/ruby_fallback.rb +148 -0
- data/lib/immutable_set/stdlib_set_method_overrides.rb +155 -0
- data/lib/immutable_set/version.rb +3 -0
- metadata +137 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 142c33ab1a414a70f1d310c511174e4ca1491c25b73062057595b93412f4be54
|
4
|
+
data.tar.gz: 8b2bf71c740c2c1273c33524a0d9b5465a3bbfdc373079dd70025ec4da4a3804
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a1f80fe0c093925ed2012facf812be01d065e164199adb4d03e15ae311ab3c34d0a705fc86d90b9d187b9f3519bf3076e65199e566ea6f7b9fdd6b7c0f647871
|
7
|
+
data.tar.gz: 59628f65aad7609e958b1f8fa444b5d855dc87a66d5801ef74b099bb34dd76bcf9ce3897bcead01e5f0d73c38abe7c10f37da5498487bb7d37abb66e100481f8
|
data/.gitignore
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
*.bundle
|
2
|
+
*.gem
|
3
|
+
*.iml
|
4
|
+
*.stTheme.cache
|
5
|
+
*.sublime-project
|
6
|
+
*.sublime-workspace
|
7
|
+
*.swp
|
8
|
+
*.tmlanguage.cache
|
9
|
+
*.tmPreferences.cache
|
10
|
+
*~
|
11
|
+
.byebug_history
|
12
|
+
.DS_Store
|
13
|
+
.idea/
|
14
|
+
.ruby-gemset
|
15
|
+
.ruby-version
|
16
|
+
.tags
|
17
|
+
.tags1
|
18
|
+
bbin/
|
19
|
+
binstubs/*
|
20
|
+
bundler_stubs/*/.yardoc
|
21
|
+
Gemfile.lock
|
22
|
+
/.bundle/
|
23
|
+
/_yardoc/
|
24
|
+
/coverage/
|
25
|
+
/doc/
|
26
|
+
/pkg/
|
27
|
+
/spec/reports/
|
28
|
+
/tmp/
|
29
|
+
|
30
|
+
# rspec failure tracking
|
31
|
+
.rspec_status
|
data/.rspec
ADDED
data/.travis.yml
ADDED
data/BENCHMARK.md
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
Results of `rake:benchmark` on ruby 2.5.1p57 (2018-03-29 revision 63029) [x86_64-darwin17]
|
2
|
+
|
3
|
+
Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. If the `rbtree` gem is present, `SortedSet` will [use it](https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724) and become even slower.
|
4
|
+
|
5
|
+
```
|
6
|
+
#- with 5M overlapping items
|
7
|
+
gem: 6.6 i/s
|
8
|
+
gem w/o c: 0.8 i/s - 7.85x slower
|
9
|
+
stdlib: 0.7 i/s - 9.51x slower```
|
10
|
+
```
|
11
|
+
#- with 5M distinct items
|
12
|
+
gem: 1429392.7 i/s
|
13
|
+
gem w/o c: 1414260.7 i/s - same-ish
|
14
|
+
stdlib: 1.0 i/s - 1456728.62x slower```
|
15
|
+
```
|
16
|
+
#^ with 5M overlapping items
|
17
|
+
gem: 0.9 i/s
|
18
|
+
gem w/o C: 0.4 i/s - 2.12x slower
|
19
|
+
stdlib: 0.4 i/s - 2.16x slower
|
20
|
+
```
|
21
|
+
```
|
22
|
+
#^ with 5M distinct items
|
23
|
+
gem w/o C: 0.8 i/s
|
24
|
+
gem: 0.6 i/s - 1.25x slower
|
25
|
+
stdlib: 0.5 i/s - 1.65x slower
|
26
|
+
```
|
27
|
+
```
|
28
|
+
#intersect? with 5M intersecting items
|
29
|
+
gem: 266.8 i/s
|
30
|
+
gem w/o C: 8.2 i/s - 32.53x slower
|
31
|
+
stdlib: 2.2 i/s - 121.88x slower
|
32
|
+
```
|
33
|
+
```
|
34
|
+
#intersect? with 5M sparse items (rare case?)
|
35
|
+
gem w/o C: 1442.5 i/s
|
36
|
+
gem: 185.2 i/s - 7.79x slower
|
37
|
+
stdlib: 2.0 i/s - 712.75x slower
|
38
|
+
```
|
39
|
+
```
|
40
|
+
#intersect? with 5M distinct items
|
41
|
+
gem: 1376038.3 i/s
|
42
|
+
gem w/o C: 1375048.5 i/s - same-ish
|
43
|
+
stdlib: 2.0 i/s - 675307.67x slower
|
44
|
+
```
|
45
|
+
```
|
46
|
+
#& with 5M intersecting items
|
47
|
+
gem: 6.4 i/s
|
48
|
+
gem w/o C: 2.6 i/s - 2.49x slower
|
49
|
+
Array#&: 1.3 i/s - 4.83x slower
|
50
|
+
stdlib: 0.9 i/s - 6.90x slower
|
51
|
+
```
|
52
|
+
```
|
53
|
+
#& with 5M sparse items (rare case?)
|
54
|
+
gem: 88.3 i/s
|
55
|
+
gem w/o C: 19.6 i/s - 4.50x slower
|
56
|
+
stdlib: 2.0 i/s - 44.46x slower
|
57
|
+
Array#&: 1.8 i/s - 49.61x slower
|
58
|
+
```
|
59
|
+
```
|
60
|
+
#& with 5M distinct items
|
61
|
+
gem w/o C: 578891.9 i/s
|
62
|
+
gem: 571604.2 i/s - same-ish
|
63
|
+
stdlib: 2.1 i/s - 281016.75x slower
|
64
|
+
Array#&: 1.8 i/s - 316493.80x slower
|
65
|
+
```
|
66
|
+
```
|
67
|
+
#inversion with 5M items
|
68
|
+
gem: 1.8 i/s
|
69
|
+
gem w/o C: 0.7 i/s - 2.58x slower
|
70
|
+
stdlib #-: 0.3 i/s - 6.67x slower
|
71
|
+
```
|
72
|
+
```
|
73
|
+
#inversion with 100k items
|
74
|
+
gem: 239.5 i/s
|
75
|
+
gem w/o C: 62.8 i/s - 3.81x slower
|
76
|
+
stdlib #-: 29.2 i/s - 8.22x slower
|
77
|
+
```
|
78
|
+
```
|
79
|
+
#minmax with 10M items
|
80
|
+
gem: 3180102.2 i/s
|
81
|
+
gem w/o C: 3170355.3 i/s - same-ish
|
82
|
+
stdlib: 5.3 i/s - 595743.46x slower
|
83
|
+
```
|
84
|
+
```
|
85
|
+
#minmax with 1M items
|
86
|
+
gem: 3247178.7 i/s
|
87
|
+
gem w/o C: 3231669.0 i/s - same-ish
|
88
|
+
stdlib: 52.8 i/s - 61535.19x slower
|
89
|
+
```
|
90
|
+
```
|
91
|
+
::new with 5M Range items
|
92
|
+
gem: 0.8 i/s
|
93
|
+
gem w/o C: 0.6 i/s - 1.27x slower
|
94
|
+
stdlib: 0.4 i/s - 1.78x slower
|
95
|
+
```
|
96
|
+
```
|
97
|
+
::new with 100k Range items
|
98
|
+
gem: 126.7 i/s
|
99
|
+
gem w/o C: 69.2 i/s - 1.83x slower
|
100
|
+
stdlib: 33.1 i/s - 3.83x slower
|
101
|
+
```
|
102
|
+
```
|
103
|
+
::new with 10k Range items in 10 non-continuous Ranges
|
104
|
+
gem: 3117.6 i/s
|
105
|
+
gem w/o C: 1326.2 i/s - 2.35x slower
|
106
|
+
stdlib: 666.7 i/s - 4.68x slower
|
107
|
+
```
|
108
|
+
```
|
109
|
+
#(proper_)subset/superset? with 5M subset items
|
110
|
+
gem: 50.8 i/s
|
111
|
+
gem w/o C: 1.4 i/s - 37.61x slower
|
112
|
+
stdlib: 1.3 i/s - 37.71x slower
|
113
|
+
```
|
114
|
+
```
|
115
|
+
#(proper_)subset/superset? with 5M overlapping items
|
116
|
+
gem: 51.0 i/s
|
117
|
+
gem w/o C: 1.4 i/s - 36.49x slower
|
118
|
+
stdlib: 1.4 i/s - 36.74x slower
|
119
|
+
```
|
120
|
+
```
|
121
|
+
#(proper_)subset/superset? with 100k overlapping items
|
122
|
+
gem: 3238.3 i/s
|
123
|
+
stdlib: 302.9 i/s - 10.69x slower
|
124
|
+
gem w/o C: 281.8 i/s - 11.49x slower
|
125
|
+
```
|
126
|
+
```
|
127
|
+
#+ with 5M overlapping items
|
128
|
+
gem: 1.4 i/s
|
129
|
+
stdlib: 1.2 i/s - 1.19x slower
|
130
|
+
gem w/o C: 0.9 i/s - 1.49x slower
|
131
|
+
```
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2018 Jannosch Müller
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/immutable_set.svg)](http://badge.fury.io/rb/immutable_set)
|
2
|
+
[![Build Status](https://travis-ci.org/janosch-x/immutable_set.svg?branch=master)](https://travis-ci.org/janosch-x/immutable_set)
|
3
|
+
|
4
|
+
# ImmutableSet
|
5
|
+
|
6
|
+
A faster, immutable replacement for Ruby's [`Set`](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html).
|
7
|
+
|
8
|
+
On Ruby >= 2.4, all operations are faster, some by several orders of magnitude (see [benchmarks](./BENCHMARK.md)).
|
9
|
+
|
10
|
+
#### Useful for ...
|
11
|
+
|
12
|
+
- creating and working with large sorted sets
|
13
|
+
- intersecting, merging, diffing, checking for subsets etc.
|
14
|
+
- the [advantages of immutability](https://hackernoon.com/f98e7e85b6ac)
|
15
|
+
|
16
|
+
#### Not useful for ...
|
17
|
+
|
18
|
+
- small sets and other cases where performance is negligible
|
19
|
+
- sets with mixed members or any members that are not mutually comparable
|
20
|
+
- doing a lot of adding, removing, and checking of single items
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
require 'immutable_set'
|
26
|
+
|
27
|
+
class MySet < ImmutableSet; end
|
28
|
+
```
|
29
|
+
|
30
|
+
Mutating methods of `Set` (e.g. `#add`, `#delete`) are removed. They can be substituted like this if needed:
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
set1 = MySet[1, 2, 3]
|
34
|
+
set1 += MySet[4] # => MySet[1, 2, 3, 4]
|
35
|
+
set1 -= MySet[3] # => MySet[1, 2, 4]
|
36
|
+
```
|
37
|
+
|
38
|
+
Immutability is required for most of the [performance optimizations](#performance-optimizations).
|
39
|
+
|
40
|
+
All other methods behave as in `Set`/`SortedSet`, so see the [official documentation](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html) for details about what they do.
|
41
|
+
|
42
|
+
## New methods
|
43
|
+
|
44
|
+
**#distinct_bounds?**
|
45
|
+
|
46
|
+
Returns true iff the passed set is beyond the `#minmax` boundaries of `self`.
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
MySet[2, 4].distinct_bounds?(MySet[3]) # => false
|
50
|
+
MySet[2, 4].distinct_bounds?(MySet[5]) # => true
|
51
|
+
```
|
52
|
+
|
53
|
+
**::from_ranges**
|
54
|
+
|
55
|
+
Returns a set built from all passed `Ranges`.
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
MySet.from_ranges(2..4, 6..8) # => MySet[2, 3, 4, 6, 7, 8]
|
59
|
+
```
|
60
|
+
|
61
|
+
**#inversion**
|
62
|
+
|
63
|
+
Returns a new set of the same class, containing all members `from`..`upto` that are not in `self`. Faster than `Set.new(from..upto) - self`.
|
64
|
+
|
65
|
+
```ruby
|
66
|
+
MySet[3, 5].inversion(from: 1, upto: 4) # => MySet[1, 2, 4]`
|
67
|
+
MySet['c'].inversion(from: 'a', upto: 'd') # => MySet['a', 'b', 'd']
|
68
|
+
```
|
69
|
+
|
70
|
+
## Performance optimizations
|
71
|
+
|
72
|
+
The cost of many methods is reduced from O(m*n) to O(m+n) or better. The underlying ideas are:
|
73
|
+
|
74
|
+
- never needing to sort, because the internal `@hash` is built in order and then frozen
|
75
|
+
- remembering `#max` cheaply whenever possible
|
76
|
+
- this allows skipping unneeded checks for members outside the own `#minmax` boundaries
|
77
|
+
- avoiding unneeded lookups during comparisons by iterating over both sets in parallel in C
|
78
|
+
- parallel iteration can skip over gaps in either set since both hashes are ordered
|
79
|
+
- when using Ruby, preferring `#while` over slower, scope-building iteration methods
|
80
|
+
|
81
|
+
## Benchmarks
|
82
|
+
|
83
|
+
Run `rake benchmark` or see [BENCHMARK.md](./BENCHMARK.md).
|
data/Rakefile
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
|
4
|
+
RSpec::Core::RakeTask.new(:spec)
|
5
|
+
|
6
|
+
task default: :spec
|
7
|
+
|
8
|
+
require 'rake/extensiontask'
|
9
|
+
|
10
|
+
Rake::ExtensionTask.new('immutable_set') do |ext|
|
11
|
+
ext.lib_dir = 'lib/immutable_set'
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Download relevant ruby/spec tests, adapt to ImmutableSet and its variants'
|
15
|
+
task :sync_ruby_spec do
|
16
|
+
require 'fileutils'
|
17
|
+
|
18
|
+
variants = {
|
19
|
+
'ImmutableSet' => './spec/ruby-spec/library/immutable_set',
|
20
|
+
'ImmutableSet::Pure' => './spec/ruby-spec/library/immutable_set_pure',
|
21
|
+
}
|
22
|
+
variants.each { |_, dir| FileUtils.rm_rf(dir) if File.exist?(base_dir) }
|
23
|
+
|
24
|
+
`svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{base_dir}`
|
25
|
+
|
26
|
+
base = variants.first[1]
|
27
|
+
variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
|
28
|
+
|
29
|
+
variants.each.with_index do |(class_name, dir), i|
|
30
|
+
Dir["#{dir}/**/*.rb"].each do |spec|
|
31
|
+
if spec =~ %r{/(add|append|case|clear|collect|delete|filter|flatten|
|
32
|
+
initialize|keep_if|map|merge|replace|reject|select|subtract)}x
|
33
|
+
File.delete(spec)
|
34
|
+
next
|
35
|
+
end
|
36
|
+
|
37
|
+
# `i` must be added to shared example names or they'll override each other
|
38
|
+
adapted_content =
|
39
|
+
File
|
40
|
+
.read(spec)
|
41
|
+
.gsub('SortedSet', class_name)
|
42
|
+
.gsub('sorted_set_', "sorted_set_#{i}_")
|
43
|
+
.gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |method|')
|
44
|
+
.gsub('@method', 'method')
|
45
|
+
.gsub(/be_(false|true)/, 'be \1')
|
46
|
+
.gsub('mock', 'double')
|
47
|
+
|
48
|
+
File.open(spec, 'w') { |f| f.puts adapted_content }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
desc 'Run all IPS benchmarks'
|
54
|
+
task :benchmark do
|
55
|
+
Dir['./benchmarks/*.rb'].sort.each { |file| require file }
|
56
|
+
end
|
57
|
+
|
58
|
+
namespace :benchmark do
|
59
|
+
desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
|
60
|
+
task :write_to_file do
|
61
|
+
$store_comparison_results = {}
|
62
|
+
|
63
|
+
Rake.application[:benchmark].invoke
|
64
|
+
|
65
|
+
File.open('BENCHMARK.md', 'w') do |f|
|
66
|
+
f.puts "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}",
|
67
|
+
'',
|
68
|
+
'Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. '\
|
69
|
+
'If the `rbtree` gem is present, `SortedSet` will [use it]'\
|
70
|
+
'(https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724)'\
|
71
|
+
' and become even slower.',
|
72
|
+
'',
|
73
|
+
|
74
|
+
$store_comparison_results.each do |caption, result|
|
75
|
+
f.puts '```', caption, result.strip.gsub(/(same-ish).*$/, '\1').lines[1..-1], '```'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
unless RUBY_PLATFORM =~ /java/
|
82
|
+
# recompile before benchmarking or running specs
|
83
|
+
task(:benchmark).enhance([:compile])
|
84
|
+
task(:spec).enhance([:compile])
|
85
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'immutable_set'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
class S < SortedSet; end
|
14
|
+
class I < ImmutableSet; end
|
15
|
+
class P < ImmutableSet::Pure; end
|
16
|
+
|
17
|
+
require "irb"
|
18
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1,445 @@
|
|
1
|
+
// This speeds up set methods that take an enum by iterating both in parallel.
|
2
|
+
// Simple sanity checks and casting the arg to set are done in Ruby beforehand.
|
3
|
+
// Internal hashes of the recipient and arg must have been created in order.
|
4
|
+
|
5
|
+
#ifndef HAVE_STRUCT_ST_TABLE_ENTRIES
|
6
|
+
// the optional extension doesn't work for ruby < 2.4, skip defining module
|
7
|
+
void Init_immutable_set() {}
|
8
|
+
#else
|
9
|
+
|
10
|
+
#include "ruby.h"
|
11
|
+
#include "ruby/st.h"
|
12
|
+
|
13
|
+
enum iter_state {A_LT_B = -1, A_EQ_B = 0, A_GT_B = 1, EOF_A = -2, EOF_B = -3};
|
14
|
+
enum iter_action {ITER_ADVANCE_A, ITER_ADVANCE_B, ITER_ADVANCE_BOTH, ITER_END};
|
15
|
+
|
16
|
+
typedef enum iter_state(*compare_function)(VALUE, VALUE);
|
17
|
+
typedef enum iter_action(*comp_callback)(enum iter_state, VALUE*);
|
18
|
+
typedef enum iter_action(*proc_callback)(enum iter_state, VALUE*, VALUE, VALUE);
|
19
|
+
|
20
|
+
static enum iter_state
|
21
|
+
compare_fixnum_values(VALUE a, VALUE b) {
|
22
|
+
if (a < b) return A_LT_B;
|
23
|
+
if (a > b) return A_GT_B;
|
24
|
+
return A_EQ_B;
|
25
|
+
}
|
26
|
+
|
27
|
+
static enum iter_state
|
28
|
+
compare_any_values(VALUE a, VALUE b) {
|
29
|
+
return rb_cmpint(rb_funcallv(a, rb_intern("<=>"), 1, &b), a, b);
|
30
|
+
}
|
31
|
+
|
32
|
+
#ifndef STRING_P
|
33
|
+
# define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
|
34
|
+
#endif
|
35
|
+
|
36
|
+
static compare_function
|
37
|
+
optimal_compare_function(VALUE set_a, VALUE set_b) {
|
38
|
+
VALUE max_a, max_b;
|
39
|
+
|
40
|
+
max_a = rb_iv_get(set_a, "@max");
|
41
|
+
max_b = rb_iv_get(set_b, "@max");
|
42
|
+
|
43
|
+
if (FIXNUM_P(max_a) && FIXNUM_P(max_b)) return compare_fixnum_values;
|
44
|
+
if (STRING_P(max_a) && STRING_P(max_b)) return rb_str_cmp;
|
45
|
+
return compare_any_values;
|
46
|
+
}
|
47
|
+
|
48
|
+
struct LOC_st_stable_entry {
|
49
|
+
st_index_t hash;
|
50
|
+
st_data_t key;
|
51
|
+
st_data_t record;
|
52
|
+
};
|
53
|
+
|
54
|
+
static struct LOC_st_stable_entry*
|
55
|
+
set_entries_ptr(VALUE set, st_index_t* size_ptr) {
|
56
|
+
VALUE hash;
|
57
|
+
|
58
|
+
hash = rb_iv_get(set, "@hash");
|
59
|
+
*size_ptr = RHASH_SIZE(hash);
|
60
|
+
|
61
|
+
return (struct LOC_st_stable_entry*)RHASH_TBL(hash)->entries;
|
62
|
+
}
|
63
|
+
|
64
|
+
#define PARALLEL_ITERATE(...) \
|
65
|
+
st_index_t size_a, size_b, i, j; \
|
66
|
+
compare_function compare_func; \
|
67
|
+
enum iter_state state; \
|
68
|
+
struct LOC_st_stable_entry *entries_a, *entries_b; \
|
69
|
+
VALUE entry_a, entry_b; \
|
70
|
+
\
|
71
|
+
entries_a = set_entries_ptr(set_a, &size_a); \
|
72
|
+
entries_b = set_entries_ptr(set_b, &size_b); \
|
73
|
+
if (!size_a || !size_b) return memo; \
|
74
|
+
\
|
75
|
+
i = j = 0; \
|
76
|
+
entry_a = entries_a[i].key; \
|
77
|
+
entry_b = entries_b[j].key; \
|
78
|
+
compare_func = optimal_compare_function(set_a, set_b); \
|
79
|
+
\
|
80
|
+
for (;;) { \
|
81
|
+
state = (*compare_func)(entry_a, entry_b); \
|
82
|
+
\
|
83
|
+
eval_state: \
|
84
|
+
switch((*callback)(state, __VA_ARGS__)) { \
|
85
|
+
case ITER_ADVANCE_A: \
|
86
|
+
if (++i >= size_a) { state = EOF_A; goto eval_state; } \
|
87
|
+
entry_a = entries_a[i].key; \
|
88
|
+
continue; \
|
89
|
+
case ITER_ADVANCE_B: \
|
90
|
+
if (++j >= size_b) { state = EOF_B; goto eval_state; } \
|
91
|
+
entry_b = entries_b[j].key; \
|
92
|
+
continue; \
|
93
|
+
case ITER_ADVANCE_BOTH: \
|
94
|
+
if (++i >= size_a) { state = EOF_A; goto eval_state; } \
|
95
|
+
entry_a = entries_a[i].key; \
|
96
|
+
if (++j >= size_b) { state = EOF_B; goto eval_state; } \
|
97
|
+
entry_b = entries_b[j].key; \
|
98
|
+
continue; \
|
99
|
+
case ITER_END: \
|
100
|
+
return memo; \
|
101
|
+
} \
|
102
|
+
} \
|
103
|
+
|
104
|
+
static VALUE
|
105
|
+
parallel_compare(VALUE set_a, VALUE set_b, comp_callback callback, VALUE memo) {
|
106
|
+
PARALLEL_ITERATE(&memo);
|
107
|
+
}
|
108
|
+
|
109
|
+
static VALUE
|
110
|
+
parallel_process(VALUE set_a, VALUE set_b, proc_callback callback, VALUE memo) {
|
111
|
+
PARALLEL_ITERATE(&memo, entry_a, entry_b);
|
112
|
+
}
|
113
|
+
|
114
|
+
static enum iter_action
|
115
|
+
check_first_subset_of_second(enum iter_state state, VALUE* memo) {
|
116
|
+
switch(state) {
|
117
|
+
case A_LT_B: *memo = Qfalse; break; // entry_a not in set_b
|
118
|
+
case A_EQ_B: return ITER_ADVANCE_BOTH;
|
119
|
+
case A_GT_B: return ITER_ADVANCE_B;
|
120
|
+
case EOF_A: *memo = Qtrue; break; // checked all in set_a
|
121
|
+
case EOF_B: *memo = Qfalse; break; // no more comparandi in set_b
|
122
|
+
}
|
123
|
+
return ITER_END;
|
124
|
+
}
|
125
|
+
|
126
|
+
// Returns Qtrue if SET_A is a subset (proper or not) of SET_B, else Qfalse.
|
127
|
+
static VALUE
|
128
|
+
method_subset_p(VALUE self, VALUE set_a, VALUE set_b) {
|
129
|
+
return parallel_compare(set_a, set_b, check_first_subset_of_second, Qfalse);
|
130
|
+
}
|
131
|
+
|
132
|
+
// Returns Qtrue if SET_A is a superset (proper or not) of SET_B, else Qfalse.
|
133
|
+
static VALUE
|
134
|
+
method_superset_p(VALUE self, VALUE set_a, VALUE set_b) {
|
135
|
+
return parallel_compare(set_b, set_a, check_first_subset_of_second, Qfalse);
|
136
|
+
}
|
137
|
+
|
138
|
+
// TODO: if (a > b max || b > a max) *memo = Qfalse; break; ?
|
139
|
+
static enum iter_action
|
140
|
+
check_if_intersect(enum iter_state state, VALUE* memo) {
|
141
|
+
switch(state) {
|
142
|
+
case A_LT_B: return ITER_ADVANCE_A;
|
143
|
+
case A_EQ_B: *memo = Qtrue; break; // found common member
|
144
|
+
case A_GT_B: return ITER_ADVANCE_B;
|
145
|
+
case EOF_A: *memo = Qfalse; break;
|
146
|
+
case EOF_B: *memo = Qfalse; break;
|
147
|
+
}
|
148
|
+
return ITER_END;
|
149
|
+
}
|
150
|
+
|
151
|
+
// Returns Qtrue if SET_A intersects with SET_B, else Qfalse.
|
152
|
+
static VALUE
|
153
|
+
method_intersect_p(VALUE self, VALUE set_a, VALUE set_b) {
|
154
|
+
return parallel_compare(set_a, set_b, check_if_intersect, Qfalse);
|
155
|
+
}
|
156
|
+
|
157
|
+
static void
|
158
|
+
set_max_ivar_for_set(VALUE set) {
|
159
|
+
struct LOC_st_stable_entry *entries;
|
160
|
+
st_index_t size;
|
161
|
+
|
162
|
+
entries = set_entries_ptr(set, &size);
|
163
|
+
if (size) rb_iv_set(set, "@max", entries[size - 1].key);
|
164
|
+
}
|
165
|
+
|
166
|
+
#define MEMO_HASH (memo[0])
|
167
|
+
#define MEMO_SET_A_DEPLETED (memo[1])
|
168
|
+
#define MEMO_SET_B_DEPLETED (memo[2])
|
169
|
+
|
170
|
+
// helper to process two sets and build a new one in parallel
|
171
|
+
static VALUE
|
172
|
+
parallel_build(VALUE set_a, VALUE set_b, proc_callback proc) {
|
173
|
+
VALUE new_set, new_hash, memo[3];
|
174
|
+
|
175
|
+
// prepare new Set
|
176
|
+
new_set = rb_class_new_instance(0, 0, RBASIC(set_a)->klass);
|
177
|
+
new_hash = rb_hash_new();
|
178
|
+
rb_iv_set(new_set, "@hash", new_hash);
|
179
|
+
|
180
|
+
MEMO_HASH = new_hash;
|
181
|
+
MEMO_SET_A_DEPLETED = 0;
|
182
|
+
MEMO_SET_B_DEPLETED = 0;
|
183
|
+
|
184
|
+
parallel_process(set_a, set_b, proc, (VALUE)memo);
|
185
|
+
|
186
|
+
set_max_ivar_for_set(new_set);
|
187
|
+
rb_obj_freeze(new_hash);
|
188
|
+
|
189
|
+
return new_set;
|
190
|
+
}
|
191
|
+
|
192
|
+
static enum iter_action
|
193
|
+
add_shared_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
194
|
+
VALUE *memo;
|
195
|
+
|
196
|
+
switch(state) {
|
197
|
+
case A_LT_B: return ITER_ADVANCE_A;
|
198
|
+
case A_EQ_B:
|
199
|
+
memo = (VALUE*)*memp;
|
200
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
201
|
+
return ITER_ADVANCE_BOTH;
|
202
|
+
case A_GT_B: return ITER_ADVANCE_B;
|
203
|
+
case EOF_A: break;
|
204
|
+
case EOF_B: break;
|
205
|
+
}
|
206
|
+
return ITER_END;
|
207
|
+
}
|
208
|
+
|
209
|
+
// Returns a new set containing all members shared by SET_A and SET_B.
|
210
|
+
static VALUE
|
211
|
+
method_intersection(VALUE self, VALUE set_a, VALUE set_b) {
|
212
|
+
return parallel_build(set_a, set_b, add_shared_to_hash);
|
213
|
+
}
|
214
|
+
|
215
|
+
static enum iter_action
|
216
|
+
add_any_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
217
|
+
VALUE *memo = (VALUE*)*memp;
|
218
|
+
|
219
|
+
switch(state) {
|
220
|
+
case A_LT_B:
|
221
|
+
if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
|
222
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
223
|
+
return ITER_ADVANCE_B;
|
224
|
+
}
|
225
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
226
|
+
return ITER_ADVANCE_A;
|
227
|
+
case A_EQ_B:
|
228
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
229
|
+
return ITER_ADVANCE_BOTH; // shared member
|
230
|
+
case A_GT_B:
|
231
|
+
if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
|
232
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
233
|
+
return ITER_ADVANCE_A;
|
234
|
+
}
|
235
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
236
|
+
return ITER_ADVANCE_B;
|
237
|
+
case EOF_A:
|
238
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
239
|
+
MEMO_SET_A_DEPLETED = 1;
|
240
|
+
if (MEMO_SET_B_DEPLETED) break; // break if both sets depleted
|
241
|
+
return ITER_ADVANCE_B;
|
242
|
+
case EOF_B:
|
243
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
244
|
+
MEMO_SET_B_DEPLETED = 1;
|
245
|
+
if (MEMO_SET_A_DEPLETED) break; // break if both sets depleted
|
246
|
+
return ITER_ADVANCE_A;
|
247
|
+
}
|
248
|
+
return ITER_END;
|
249
|
+
}
|
250
|
+
|
251
|
+
// Returns a new set that includes all members of SET_A and/or SET_B.
|
252
|
+
static VALUE
|
253
|
+
method_union(VALUE self, VALUE set_a, VALUE set_b) {
|
254
|
+
return parallel_build(set_a, set_b, add_any_members_to_hash);
|
255
|
+
}
|
256
|
+
|
257
|
+
#define INSERT_UNLESS_EQUAL(val, other, hsh) \
|
258
|
+
if (compare_any_values(val, other)) { st_insert(RHASH_TBL(hsh), val, Qtrue); }
|
259
|
+
|
260
|
+
static enum iter_action
|
261
|
+
add_nonb_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
262
|
+
VALUE *memo = (VALUE*)*memp;
|
263
|
+
|
264
|
+
switch(state) {
|
265
|
+
case A_LT_B:
|
266
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
267
|
+
return ITER_ADVANCE_A;
|
268
|
+
case A_EQ_B:
|
269
|
+
return ITER_ADVANCE_BOTH; // shared member
|
270
|
+
case A_GT_B:
|
271
|
+
if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
|
272
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
273
|
+
return ITER_ADVANCE_A;
|
274
|
+
}
|
275
|
+
return ITER_ADVANCE_B;
|
276
|
+
case EOF_A:
|
277
|
+
// if set b is also depleted, add a unless equal to final b
|
278
|
+
if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); }
|
279
|
+
break;
|
280
|
+
case EOF_B:
|
281
|
+
MEMO_SET_B_DEPLETED = 1;
|
282
|
+
return ITER_ADVANCE_A;
|
283
|
+
}
|
284
|
+
return ITER_END;
|
285
|
+
}
|
286
|
+
|
287
|
+
// Returns a new set that includes any member of either passed set.
|
288
|
+
static VALUE
|
289
|
+
method_difference(VALUE self, VALUE set_a, VALUE set_b) {
|
290
|
+
return parallel_build(set_a, set_b, add_nonb_members_to_hash);
|
291
|
+
}
|
292
|
+
|
293
|
+
static enum iter_action
|
294
|
+
add_xor_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
|
295
|
+
VALUE *memo = (VALUE*)*memp;
|
296
|
+
|
297
|
+
switch(state) {
|
298
|
+
case A_LT_B:
|
299
|
+
if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
|
300
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
301
|
+
return ITER_ADVANCE_B;
|
302
|
+
}
|
303
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
304
|
+
return ITER_ADVANCE_A;
|
305
|
+
case A_EQ_B:
|
306
|
+
return ITER_ADVANCE_BOTH; // shared member, skip
|
307
|
+
case A_GT_B:
|
308
|
+
if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
|
309
|
+
st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
|
310
|
+
return ITER_ADVANCE_A;
|
311
|
+
}
|
312
|
+
st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
|
313
|
+
return ITER_ADVANCE_B;
|
314
|
+
case EOF_A:
|
315
|
+
// if set b is also depleted, add a unless equal to final b and break
|
316
|
+
if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); break; }
|
317
|
+
INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); // add b unless equal to final a
|
318
|
+
MEMO_SET_A_DEPLETED = 1; // mark set a as depleted
|
319
|
+
return ITER_ADVANCE_B;
|
320
|
+
case EOF_B:
|
321
|
+
// if set a is also depleted, add b unless equal to final a and break
|
322
|
+
if (MEMO_SET_A_DEPLETED) { INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); break; }
|
323
|
+
INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); // add a unless equal to final b
|
324
|
+
MEMO_SET_B_DEPLETED = 1; // mark set b as depleted
|
325
|
+
return ITER_ADVANCE_A;
|
326
|
+
}
|
327
|
+
return ITER_END;
|
328
|
+
}
|
329
|
+
|
330
|
+
// Returns a new set that is a XOR result of SET_A and SET_B.
|
331
|
+
static VALUE
|
332
|
+
method_exclusion(VALUE self, VALUE set_a, VALUE set_b) {
|
333
|
+
return parallel_build(set_a, set_b, add_xor_members_to_hash);
|
334
|
+
}
|
335
|
+
|
336
|
+
#define INCR_FIXNUM_ID(id) (id += 2)
|
337
|
+
#define DECR_FIXNUM_ID(id) (id -= 2)
|
338
|
+
|
339
|
+
#define GET_RANGE_FIXNUM_IDS(range, from_id, upto_id) \
|
340
|
+
int excl; \
|
341
|
+
if (!rb_range_values(range, &from_id, &upto_id, &excl)) { \
|
342
|
+
rb_raise(rb_eArgError, "Pass a Range"); \
|
343
|
+
} \
|
344
|
+
if (excl) DECR_FIXNUM_ID(upto_id); \
|
345
|
+
Check_Type(from_id, T_FIXNUM); \
|
346
|
+
Check_Type(upto_id, T_FIXNUM);
|
347
|
+
|
348
|
+
// Fills HASH will all Fixnums in RANGE.
|
349
|
+
static VALUE
|
350
|
+
method_fill_with_fixnums(VALUE self, VALUE hash, VALUE range) {
|
351
|
+
VALUE from_id, upto_id;
|
352
|
+
st_table *tbl;
|
353
|
+
|
354
|
+
GET_RANGE_FIXNUM_IDS(range, from_id, upto_id);
|
355
|
+
tbl = RHASH_TBL(hash);
|
356
|
+
|
357
|
+
while (from_id <= upto_id) {
|
358
|
+
st_insert(tbl, from_id, Qtrue);
|
359
|
+
INCR_FIXNUM_ID(from_id);
|
360
|
+
}
|
361
|
+
|
362
|
+
return upto_id;
|
363
|
+
}
|
364
|
+
|
365
|
+
inline static void
|
366
|
+
insert_fixnum_id(st_table *tbl, VALUE id, int ucp_only) {
|
367
|
+
if (!ucp_only || id <= 0x1B000 || id >= 0x1C000) {
|
368
|
+
st_insert(tbl, id, Qtrue);
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
// Returns a new set that is a XOR result of SET and the given RANGE.
|
373
|
+
static VALUE
|
374
|
+
method_invert_fixnum_set(VALUE self, VALUE set, VALUE range, VALUE ucp) {
|
375
|
+
VALUE fixnum_id, upto_id, new_hash, new_set, entry;
|
376
|
+
st_index_t size, i;
|
377
|
+
int ucp_only;
|
378
|
+
st_table *new_tbl;
|
379
|
+
struct LOC_st_stable_entry *entries;
|
380
|
+
|
381
|
+
GET_RANGE_FIXNUM_IDS(range, fixnum_id, upto_id);
|
382
|
+
ucp_only = ucp != Qfalse && ucp != Qnil && ucp != Qundef;
|
383
|
+
|
384
|
+
// get set members
|
385
|
+
entries = set_entries_ptr(set, &size);
|
386
|
+
|
387
|
+
// prepare new Set
|
388
|
+
new_set = rb_class_new_instance(0, 0, RBASIC(set)->klass);
|
389
|
+
new_hash = rb_hash_new();
|
390
|
+
new_tbl = RHASH_TBL(new_hash);
|
391
|
+
rb_iv_set(new_set, "@hash", new_hash);
|
392
|
+
|
393
|
+
if (size) {
|
394
|
+
i = 0;
|
395
|
+
entry = entries[i].key;
|
396
|
+
|
397
|
+
// here is the optimization: skipping unneeded comparisons with lower values
|
398
|
+
for (;;) {
|
399
|
+
if (fixnum_id == entry) {
|
400
|
+
// fixnum_id is in set, compare next fixnum with next set member
|
401
|
+
entry = entries[++i].key;
|
402
|
+
INCR_FIXNUM_ID(fixnum_id);
|
403
|
+
if (i == size || fixnum_id > upto_id) break;
|
404
|
+
}
|
405
|
+
else if (fixnum_id < entry) {
|
406
|
+
// fixnum_id is not in set, include in inversion
|
407
|
+
insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
|
408
|
+
INCR_FIXNUM_ID(fixnum_id);
|
409
|
+
if (fixnum_id > upto_id) break;
|
410
|
+
}
|
411
|
+
else /* if (fixnum_id > entry) */ {
|
412
|
+
// gap; fixnum_id might be in set, check next set member
|
413
|
+
entry = entries[++i].key;
|
414
|
+
if (i == size) break;
|
415
|
+
}
|
416
|
+
}
|
417
|
+
}
|
418
|
+
|
419
|
+
// include all fixnums beyond the range of the set
|
420
|
+
while (fixnum_id <= upto_id) {
|
421
|
+
insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
|
422
|
+
INCR_FIXNUM_ID(fixnum_id);
|
423
|
+
}
|
424
|
+
|
425
|
+
set_max_ivar_for_set(new_set);
|
426
|
+
rb_obj_freeze(new_hash);
|
427
|
+
|
428
|
+
return new_set;
|
429
|
+
}
|
430
|
+
|
431
|
+
void Init_immutable_set() {
|
432
|
+
VALUE mod;
|
433
|
+
mod = rb_define_module("ImmutableSetExt");
|
434
|
+
rb_define_singleton_method(mod, "difference", method_difference, 2);
|
435
|
+
rb_define_singleton_method(mod, "exclusion", method_exclusion, 2);
|
436
|
+
rb_define_singleton_method(mod, "fill_with_fixnums", method_fill_with_fixnums, 2);
|
437
|
+
rb_define_singleton_method(mod, "intersect?", method_intersect_p, 2);
|
438
|
+
rb_define_singleton_method(mod, "intersection", method_intersection, 2);
|
439
|
+
rb_define_singleton_method(mod, "invert_fixnum_set", method_invert_fixnum_set, 3);
|
440
|
+
rb_define_singleton_method(mod, "subset?", method_subset_p, 2);
|
441
|
+
rb_define_singleton_method(mod, "superset?", method_superset_p, 2);
|
442
|
+
rb_define_singleton_method(mod, "union", method_union, 2);
|
443
|
+
}
|
444
|
+
|
445
|
+
#endif // end of #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES ... #else ...
|