RubyGems - immutable_set - Versions diffs - 0.1.0 - Mend

immutable_set 0.1.0

Files changed (24) hide show

checksums.yaml +7 -0
data/.gitignore +31 -0
data/.rspec +3 -0
data/.travis.yml +10 -0
data/BENCHMARK.md +131 -0
data/Gemfile +6 -0
data/LICENSE.txt +21 -0
data/README.md +83 -0
data/Rakefile +85 -0
data/bin/console +18 -0
data/bin/setup +8 -0
data/ext/immutable_set/extconf.rb +7 -0
data/ext/immutable_set/immutable_set.c +445 -0
data/immutable_set.gemspec +33 -0
data/lib/immutable_set.rb +50 -0
data/lib/immutable_set/builder_methods.rb +60 -0
data/lib/immutable_set/disable_mutating_methods.rb +12 -0
data/lib/immutable_set/inversion.rb +13 -0
data/lib/immutable_set/native_ext.rb +19 -0
data/lib/immutable_set/pure.rb +5 -0
data/lib/immutable_set/ruby_fallback.rb +148 -0
data/lib/immutable_set/stdlib_set_method_overrides.rb +155 -0
data/lib/immutable_set/version.rb +3 -0
metadata +137 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 142c33ab1a414a70f1d310c511174e4ca1491c25b73062057595b93412f4be54
+  data.tar.gz: 8b2bf71c740c2c1273c33524a0d9b5465a3bbfdc373079dd70025ec4da4a3804
+SHA512:
+  metadata.gz: a1f80fe0c093925ed2012facf812be01d065e164199adb4d03e15ae311ab3c34d0a705fc86d90b9d187b9f3519bf3076e65199e566ea6f7b9fdd6b7c0f647871
+  data.tar.gz: 59628f65aad7609e958b1f8fa444b5d855dc87a66d5801ef74b099bb34dd76bcf9ce3897bcead01e5f0d73c38abe7c10f37da5498487bb7d37abb66e100481f8

data/.gitignore ADDED

@@ -0,0 +1,31 @@
+*.bundle
+*.gem
+*.iml
+*.stTheme.cache
+*.sublime-project
+*.sublime-workspace
+*.swp
+*.tmlanguage.cache
+*.tmPreferences.cache
+*~
+.byebug_history
+.DS_Store
+.idea/
+.ruby-gemset
+.ruby-version
+.tags
+.tags1
+bbin/
+binstubs/*
+bundler_stubs/*/.yardoc
+Gemfile.lock
+/.bundle/
+/_yardoc/
+/coverage/
+/doc/
+/pkg/
+/spec/reports/
+/tmp/
+# rspec failure tracking
+.rspec_status

data/.rspec ADDED

@@ -0,0 +1,3 @@
+--format documentation
+--color
+--require spec_helper

data/.travis.yml ADDED

@@ -0,0 +1,10 @@
+sudo: false
+language: ruby
+rvm:
+  - 2.0
+  - 2.4
+  - 2.5
+  - 2.6
+before_install:
+  - gem update --system
+  - gem install bundler

data/BENCHMARK.md ADDED

@@ -0,0 +1,131 @@
+Results of `rake:benchmark` on ruby 2.5.1p57 (2018-03-29 revision 63029) [x86_64-darwin17]
+Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. If the `rbtree` gem is present, `SortedSet` will [use it](https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724) and become even slower.
+```
+#- with 5M overlapping items
+                 gem:        6.6 i/s
+           gem w/o c:        0.8 i/s - 7.85x  slower
+              stdlib:        0.7 i/s - 9.51x  slower```
+```
+#- with 5M distinct items
+                 gem:  1429392.7 i/s
+           gem w/o c:  1414260.7 i/s - same-ish
+              stdlib:        1.0 i/s - 1456728.62x  slower```
+```
+#^ with 5M overlapping items
+                 gem:        0.9 i/s
+           gem w/o C:        0.4 i/s - 2.12x  slower
+              stdlib:        0.4 i/s - 2.16x  slower
+```
+```
+#^ with 5M distinct items
+           gem w/o C:        0.8 i/s
+                 gem:        0.6 i/s - 1.25x  slower
+              stdlib:        0.5 i/s - 1.65x  slower
+```
+```
+#intersect? with 5M intersecting items
+                 gem:      266.8 i/s
+           gem w/o C:        8.2 i/s - 32.53x  slower
+              stdlib:        2.2 i/s - 121.88x  slower
+```
+```
+#intersect? with 5M sparse items (rare case?)
+           gem w/o C:     1442.5 i/s
+                 gem:      185.2 i/s - 7.79x  slower
+              stdlib:        2.0 i/s - 712.75x  slower
+```
+```
+#intersect? with 5M distinct items
+                 gem:  1376038.3 i/s
+           gem w/o C:  1375048.5 i/s - same-ish
+              stdlib:        2.0 i/s - 675307.67x  slower
+```
+```
+#& with 5M intersecting items
+                 gem:        6.4 i/s
+           gem w/o C:        2.6 i/s - 2.49x  slower
+             Array#&:        1.3 i/s - 4.83x  slower
+              stdlib:        0.9 i/s - 6.90x  slower
+```
+```
+#& with 5M sparse items (rare case?)
+                 gem:       88.3 i/s
+           gem w/o C:       19.6 i/s - 4.50x  slower
+              stdlib:        2.0 i/s - 44.46x  slower
+             Array#&:        1.8 i/s - 49.61x  slower
+```
+```
+#& with 5M distinct items
+           gem w/o C:   578891.9 i/s
+                 gem:   571604.2 i/s - same-ish
+              stdlib:        2.1 i/s - 281016.75x  slower
+             Array#&:        1.8 i/s - 316493.80x  slower
+```
+```
+#inversion with 5M items
+                 gem:        1.8 i/s
+           gem w/o C:        0.7 i/s - 2.58x  slower
+           stdlib #-:        0.3 i/s - 6.67x  slower
+```
+```
+#inversion with 100k items
+                 gem:      239.5 i/s
+           gem w/o C:       62.8 i/s - 3.81x  slower
+           stdlib #-:       29.2 i/s - 8.22x  slower
+```
+```
+#minmax with 10M items
+                 gem:  3180102.2 i/s
+           gem w/o C:  3170355.3 i/s - same-ish
+              stdlib:        5.3 i/s - 595743.46x  slower
+```
+```
+#minmax with 1M items
+                 gem:  3247178.7 i/s
+           gem w/o C:  3231669.0 i/s - same-ish
+              stdlib:       52.8 i/s - 61535.19x  slower
+```
+```
+::new with 5M Range items
+                 gem:        0.8 i/s
+           gem w/o C:        0.6 i/s - 1.27x  slower
+              stdlib:        0.4 i/s - 1.78x  slower
+```
+```
+::new with 100k Range items
+                 gem:      126.7 i/s
+           gem w/o C:       69.2 i/s - 1.83x  slower
+              stdlib:       33.1 i/s - 3.83x  slower
+```
+```
+::new with 10k Range items in 10 non-continuous Ranges
+                 gem:     3117.6 i/s
+           gem w/o C:     1326.2 i/s - 2.35x  slower
+              stdlib:      666.7 i/s - 4.68x  slower
+```
+```
+#(proper_)subset/superset? with 5M subset items
+                 gem:       50.8 i/s
+           gem w/o C:        1.4 i/s - 37.61x  slower
+              stdlib:        1.3 i/s - 37.71x  slower
+```
+```
+#(proper_)subset/superset? with 5M overlapping items
+                 gem:       51.0 i/s
+           gem w/o C:        1.4 i/s - 36.49x  slower
+              stdlib:        1.4 i/s - 36.74x  slower
+```
+```
+#(proper_)subset/superset? with 100k overlapping items
+                 gem:     3238.3 i/s
+              stdlib:      302.9 i/s - 10.69x  slower
+           gem w/o C:      281.8 i/s - 11.49x  slower
+```
+```
+#+ with 5M overlapping items
+                 gem:        1.4 i/s
+              stdlib:        1.2 i/s - 1.19x  slower
+           gem w/o C:        0.9 i/s - 1.49x  slower
+```

data/Gemfile ADDED

@@ -0,0 +1,6 @@
+source "https://rubygems.org"
+git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
+# Specify your gem's dependencies in immutable_set.gemspec
+gemspec

data/LICENSE.txt ADDED

@@ -0,0 +1,21 @@
+The MIT License (MIT)
+Copyright (c) 2018 Jannosch Müller
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED

@@ -0,0 +1,83 @@
+[![Gem Version](https://badge.fury.io/rb/immutable_set.svg)](http://badge.fury.io/rb/immutable_set)
+[![Build Status](https://travis-ci.org/janosch-x/immutable_set.svg?branch=master)](https://travis-ci.org/janosch-x/immutable_set)
+# ImmutableSet
+A faster, immutable replacement for Ruby's [`Set`](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html).
+On Ruby >= 2.4, all operations are faster, some by several orders of magnitude (see [benchmarks](./BENCHMARK.md)).
+#### Useful for ...
+- creating and working with large sorted sets
+- intersecting, merging, diffing, checking for subsets etc.
+- the [advantages of immutability](https://hackernoon.com/f98e7e85b6ac)
+#### Not useful for ...
+- small sets and other cases where performance is negligible
+- sets with mixed members or any members that are not mutually comparable
+- doing a lot of adding, removing, and checking of single items
+## Usage
+```ruby
+require 'immutable_set'
+class MySet < ImmutableSet; end
+```
+Mutating methods of `Set` (e.g. `#add`, `#delete`) are removed. They can be substituted like this if needed:
+```ruby
+set1 = MySet[1, 2, 3]
+set1 += MySet[4] # => MySet[1, 2, 3, 4]
+set1 -= MySet[3] # => MySet[1, 2, 4]
+```
+Immutability is required for most of the [performance optimizations](#performance-optimizations).
+All other methods behave as in `Set`/`SortedSet`, so see the [official documentation](https://ruby-doc.org/stdlib-2.5.1/libdoc/set/rdoc/Set.html) for details about what they do.
+## New methods
+**#distinct_bounds?**
+Returns true iff the passed set is beyond the `#minmax` boundaries of `self`.
+```ruby
+MySet[2, 4].distinct_bounds?(MySet[3]) # => false
+MySet[2, 4].distinct_bounds?(MySet[5]) # => true
+```
+**::from_ranges**
+Returns a set built from all passed `Ranges`.
+```ruby
+MySet.from_ranges(2..4, 6..8) # => MySet[2, 3, 4, 6, 7, 8]
+```
+**#inversion**
+Returns a new set of the same class, containing all members `from`..`upto` that are not in `self`. Faster than `Set.new(from..upto) - self`.
+```ruby
+MySet[3, 5].inversion(from: 1, upto: 4) # => MySet[1, 2, 4]`
+MySet['c'].inversion(from: 'a', upto: 'd') # => MySet['a', 'b', 'd']
+```
+## Performance optimizations
+The cost of many methods is reduced from O(m*n) to O(m+n) or better. The underlying ideas are:
+- never needing to sort, because the internal `@hash` is built in order and then frozen
+- remembering `#max` cheaply whenever possible
+- this allows skipping unneeded checks for members outside the own `#minmax` boundaries
+- avoiding unneeded lookups during comparisons by iterating over both sets in parallel in C
+- parallel iteration can skip over gaps in either set since both hashes are ordered
+- when using Ruby, preferring `#while` over slower, scope-building iteration methods
+## Benchmarks
+Run `rake benchmark` or see [BENCHMARK.md](./BENCHMARK.md).

data/Rakefile ADDED

@@ -0,0 +1,85 @@
+require 'bundler/gem_tasks'
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task default: :spec
+require 'rake/extensiontask'
+Rake::ExtensionTask.new('immutable_set') do |ext|
+  ext.lib_dir = 'lib/immutable_set'
+end
+desc 'Download relevant ruby/spec tests, adapt to ImmutableSet and its variants'
+task :sync_ruby_spec do
+  require 'fileutils'
+  variants = {
+    'ImmutableSet'       => './spec/ruby-spec/library/immutable_set',
+    'ImmutableSet::Pure' => './spec/ruby-spec/library/immutable_set_pure',
+  }
+  variants.each { |_, dir| FileUtils.rm_rf(dir) if File.exist?(base_dir) }
+  `svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{base_dir}`
+  base = variants.first[1]
+  variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
+  variants.each.with_index do |(class_name, dir), i|
+    Dir["#{dir}/**/*.rb"].each do |spec|
+      if spec =~ %r{/(add|append|case|clear|collect|delete|filter|flatten|
+                      initialize|keep_if|map|merge|replace|reject|select|subtract)}x
+        File.delete(spec)
+        next
+      end
+      # `i` must be added to shared example names or they'll override each other
+      adapted_content =
+        File
+        .read(spec)
+        .gsub('SortedSet', class_name)
+        .gsub('sorted_set_', "sorted_set_#{i}_")
+        .gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |method|')
+        .gsub('@method', 'method')
+        .gsub(/be_(false|true)/, 'be \1')
+        .gsub('mock', 'double')
+      File.open(spec, 'w') { |f| f.puts adapted_content }
+    end
+  end
+end
+desc 'Run all IPS benchmarks'
+task :benchmark do
+  Dir['./benchmarks/*.rb'].sort.each { |file| require file }
+end
+namespace :benchmark do
+  desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
+  task :write_to_file do
+    $store_comparison_results = {}
+    Rake.application[:benchmark].invoke
+    File.open('BENCHMARK.md', 'w') do |f|
+      f.puts "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}",
+             '',
+             'Note: `stdlib` refers to `SortedSet` without the `rbtree` gem. '\
+             'If the `rbtree` gem is present, `SortedSet` will [use it]'\
+             '(https://github.com/ruby/ruby/blob/b1a8c64/lib/set.rb#L709-L724)'\
+             ' and become even slower.',
+             '',
+      $store_comparison_results.each do |caption, result|
+        f.puts '```', caption, result.strip.gsub(/(same-ish).*$/, '\1').lines[1..-1], '```'
+      end
+    end
+  end
+end
+unless RUBY_PLATFORM =~ /java/
+  # recompile before benchmarking or running specs
+  task(:benchmark).enhance([:compile])
+  task(:spec).enhance([:compile])
+end

data/bin/console ADDED

@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'immutable_set'
+# You can add fixtures and/or initialization code here to make experimenting
+# with your gem easier. You can also use a different console, if you like.
+# (If you use this, don't forget to add pry to your Gemfile!)
+# require "pry"
+# Pry.start
+class S < SortedSet; end
+class I < ImmutableSet; end
+class P < ImmutableSet::Pure; end
+require "irb"
+IRB.start(__FILE__)

data/bin/setup ADDED

@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+set -euo pipefail
+IFS=$'\n\t'
+set -vx
+bundle install
+# Do any other automated setup that you need to do here

data/ext/immutable_set/extconf.rb ADDED

@@ -0,0 +1,7 @@
+require 'mkmf'
+$CFLAGS << ' -Wextra -Wno-unused-parameter -Wall -pedantic '
+have_struct_member('struct st_table', 'entries')
+create_makefile('immutable_set/immutable_set')

data/ext/immutable_set/immutable_set.c ADDED

@@ -0,0 +1,445 @@
+// This speeds up set methods that take an enum by iterating both in parallel.
+// Simple sanity checks and casting the arg to set are done in Ruby beforehand.
+// Internal hashes of the recipient and arg must have been created in order.
+#ifndef HAVE_STRUCT_ST_TABLE_ENTRIES
+  // the optional extension doesn't work for ruby < 2.4, skip defining module
+  void Init_immutable_set() {}
+#else
+#include "ruby.h"
+#include "ruby/st.h"
+enum iter_state  {A_LT_B = -1, A_EQ_B = 0, A_GT_B = 1, EOF_A = -2, EOF_B = -3};
+enum iter_action {ITER_ADVANCE_A, ITER_ADVANCE_B, ITER_ADVANCE_BOTH, ITER_END};
+typedef enum iter_state(*compare_function)(VALUE, VALUE);
+typedef enum iter_action(*comp_callback)(enum iter_state, VALUE*);
+typedef enum iter_action(*proc_callback)(enum iter_state, VALUE*, VALUE, VALUE);
+static enum iter_state
+compare_fixnum_values(VALUE a, VALUE b) {
+  if (a < b) return A_LT_B;
+  if (a > b) return A_GT_B;
+             return A_EQ_B;
+}
+static enum iter_state
+compare_any_values(VALUE a, VALUE b) {
+  return rb_cmpint(rb_funcallv(a, rb_intern("<=>"), 1, &b), a, b);
+}
+#ifndef STRING_P
+# define STRING_P(s) (RB_TYPE_P((s), T_STRING) && CLASS_OF(s) == rb_cString)
+#endif
+static compare_function
+optimal_compare_function(VALUE set_a, VALUE set_b) {
+  VALUE max_a, max_b;
+  max_a = rb_iv_get(set_a, "@max");
+  max_b = rb_iv_get(set_b, "@max");
+  if (FIXNUM_P(max_a) && FIXNUM_P(max_b)) return compare_fixnum_values;
+  if (STRING_P(max_a) && STRING_P(max_b)) return rb_str_cmp;
+  return compare_any_values;
+}
+struct LOC_st_stable_entry {
+  st_index_t hash;
+  st_data_t key;
+  st_data_t record;
+};
+static struct LOC_st_stable_entry*
+set_entries_ptr(VALUE set, st_index_t* size_ptr) {
+  VALUE hash;
+  hash = rb_iv_get(set, "@hash");
+  *size_ptr = RHASH_SIZE(hash);
+  return (struct LOC_st_stable_entry*)RHASH_TBL(hash)->entries;
+}
+#define PARALLEL_ITERATE(...) \
+  st_index_t size_a, size_b, i, j; \
+  compare_function compare_func; \
+  enum iter_state state; \
+  struct LOC_st_stable_entry *entries_a, *entries_b; \
+  VALUE entry_a, entry_b; \
+  \
+  entries_a = set_entries_ptr(set_a, &size_a); \
+  entries_b = set_entries_ptr(set_b, &size_b); \
+  if (!size_a || !size_b) return memo; \
+  \
+  i = j = 0; \
+  entry_a = entries_a[i].key; \
+  entry_b = entries_b[j].key; \
+  compare_func = optimal_compare_function(set_a, set_b); \
+  \
+  for (;;) {  \
+    state = (*compare_func)(entry_a, entry_b);  \
+  \
+    eval_state:  \
+      switch((*callback)(state, __VA_ARGS__)) {  \
+        case ITER_ADVANCE_A:  \
+          if (++i >= size_a) { state = EOF_A; goto eval_state; }  \
+          entry_a = entries_a[i].key;  \
+          continue;  \
+        case ITER_ADVANCE_B:  \
+          if (++j >= size_b) { state = EOF_B; goto eval_state; }  \
+          entry_b = entries_b[j].key;  \
+          continue;  \
+        case ITER_ADVANCE_BOTH:  \
+          if (++i >= size_a) { state = EOF_A; goto eval_state; }  \
+          entry_a = entries_a[i].key;  \
+          if (++j >= size_b) { state = EOF_B; goto eval_state; }  \
+          entry_b = entries_b[j].key;  \
+          continue;  \
+        case ITER_END:  \
+          return memo;  \
+      }  \
+  }  \
+static VALUE
+parallel_compare(VALUE set_a, VALUE set_b, comp_callback callback, VALUE memo) {
+  PARALLEL_ITERATE(&memo);
+}
+static VALUE
+parallel_process(VALUE set_a, VALUE set_b, proc_callback callback, VALUE memo) {
+  PARALLEL_ITERATE(&memo, entry_a, entry_b);
+}
+static enum iter_action
+check_first_subset_of_second(enum iter_state state, VALUE* memo) {
+  switch(state) {
+    case A_LT_B: *memo = Qfalse; break; // entry_a not in set_b
+    case A_EQ_B: return ITER_ADVANCE_BOTH;
+    case A_GT_B: return ITER_ADVANCE_B;
+    case EOF_A:  *memo = Qtrue; break; // checked all in set_a
+    case EOF_B:  *memo = Qfalse; break; // no more comparandi in set_b
+  }
+  return ITER_END;
+}
+// Returns Qtrue if SET_A is a subset (proper or not) of SET_B, else Qfalse.
+static VALUE
+method_subset_p(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_compare(set_a, set_b, check_first_subset_of_second, Qfalse);
+}
+// Returns Qtrue if SET_A is a superset (proper or not) of SET_B, else Qfalse.
+static VALUE
+method_superset_p(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_compare(set_b, set_a, check_first_subset_of_second, Qfalse);
+}
+// TODO: if (a > b max || b > a max) *memo = Qfalse; break; ?
+static enum iter_action
+check_if_intersect(enum iter_state state, VALUE* memo) {
+  switch(state) {
+    case A_LT_B: return ITER_ADVANCE_A;
+    case A_EQ_B: *memo = Qtrue; break; // found common member
+    case A_GT_B: return ITER_ADVANCE_B;
+    case EOF_A:  *memo = Qfalse; break;
+    case EOF_B:  *memo = Qfalse; break;
+  }
+  return ITER_END;
+}
+// Returns Qtrue if SET_A intersects with SET_B, else Qfalse.
+static VALUE
+method_intersect_p(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_compare(set_a, set_b, check_if_intersect, Qfalse);
+}
+static void
+set_max_ivar_for_set(VALUE set) {
+  struct LOC_st_stable_entry *entries;
+  st_index_t size;
+  entries = set_entries_ptr(set, &size);
+  if (size) rb_iv_set(set, "@max", entries[size - 1].key);
+}
+#define MEMO_HASH           (memo[0])
+#define MEMO_SET_A_DEPLETED (memo[1])
+#define MEMO_SET_B_DEPLETED (memo[2])
+// helper to process two sets and build a new one in parallel
+static VALUE
+parallel_build(VALUE set_a, VALUE set_b, proc_callback proc) {
+  VALUE new_set, new_hash, memo[3];
+  // prepare new Set
+  new_set = rb_class_new_instance(0, 0, RBASIC(set_a)->klass);
+  new_hash = rb_hash_new();
+  rb_iv_set(new_set, "@hash", new_hash);
+  MEMO_HASH = new_hash;
+  MEMO_SET_A_DEPLETED = 0;
+  MEMO_SET_B_DEPLETED = 0;
+  parallel_process(set_a, set_b, proc, (VALUE)memo);
+  set_max_ivar_for_set(new_set);
+  rb_obj_freeze(new_hash);
+  return new_set;
+}
+static enum iter_action
+add_shared_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
+  VALUE *memo;
+  switch(state) {
+    case A_LT_B: return ITER_ADVANCE_A;
+    case A_EQ_B:
+      memo = (VALUE*)*memp;
+      st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+      return ITER_ADVANCE_BOTH;
+    case A_GT_B: return ITER_ADVANCE_B;
+    case EOF_A:  break;
+    case EOF_B:  break;
+  }
+  return ITER_END;
+}
+// Returns a new set containing all members shared by SET_A and SET_B.
+static VALUE
+method_intersection(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_build(set_a, set_b, add_shared_to_hash);
+}
+static enum iter_action
+add_any_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
+  VALUE *memo = (VALUE*)*memp;
+  switch(state) {
+    case A_LT_B:
+      if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
+        st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
+        return ITER_ADVANCE_B;
+      }
+      st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+      return ITER_ADVANCE_A;
+    case A_EQ_B:
+      st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+      return ITER_ADVANCE_BOTH; // shared member
+    case A_GT_B:
+      if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
+        st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+        return ITER_ADVANCE_A;
+      }
+      st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
+      return ITER_ADVANCE_B;
+    case EOF_A:
+      st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
+      MEMO_SET_A_DEPLETED = 1;
+      if (MEMO_SET_B_DEPLETED) break; // break if both sets depleted
+      return ITER_ADVANCE_B;
+    case EOF_B:
+      st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+      MEMO_SET_B_DEPLETED = 1;
+      if (MEMO_SET_A_DEPLETED) break; // break if both sets depleted
+      return ITER_ADVANCE_A;
+  }
+  return ITER_END;
+}
+// Returns a new set that includes all members of SET_A and/or SET_B.
+static VALUE
+method_union(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_build(set_a, set_b, add_any_members_to_hash);
+}
+#define INSERT_UNLESS_EQUAL(val, other, hsh) \
+  if (compare_any_values(val, other)) { st_insert(RHASH_TBL(hsh), val, Qtrue); }
+static enum iter_action
+add_nonb_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
+  VALUE *memo = (VALUE*)*memp;
+  switch(state) {
+    case A_LT_B:
+      st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+      return ITER_ADVANCE_A;
+    case A_EQ_B:
+      return ITER_ADVANCE_BOTH; // shared member
+    case A_GT_B:
+      if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
+        st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+        return ITER_ADVANCE_A;
+      }
+      return ITER_ADVANCE_B;
+    case EOF_A:
+      // if set b is also depleted, add a unless equal to final b
+      if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); }
+      break;
+    case EOF_B:
+      MEMO_SET_B_DEPLETED = 1;
+      return ITER_ADVANCE_A;
+  }
+  return ITER_END;
+}
+// Returns a new set that includes any member of either passed set.
+static VALUE
+method_difference(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_build(set_a, set_b, add_nonb_members_to_hash);
+}
+static enum iter_action
+add_xor_members_to_hash(enum iter_state state, VALUE* memp, VALUE a, VALUE b) {
+  VALUE *memo = (VALUE*)*memp;
+  switch(state) {
+    case A_LT_B:
+      if (MEMO_SET_A_DEPLETED) { // iterating through leftovers of set b
+        st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
+        return ITER_ADVANCE_B;
+      }
+      st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+      return ITER_ADVANCE_A;
+    case A_EQ_B:
+      return ITER_ADVANCE_BOTH; // shared member, skip
+    case A_GT_B:
+      if (MEMO_SET_B_DEPLETED) { // iterating through leftovers of set a
+        st_insert(RHASH_TBL(MEMO_HASH), a, Qtrue);
+        return ITER_ADVANCE_A;
+      }
+      st_insert(RHASH_TBL(MEMO_HASH), b, Qtrue);
+      return ITER_ADVANCE_B;
+    case EOF_A:
+      // if set b is also depleted, add a unless equal to final b and break
+      if (MEMO_SET_B_DEPLETED) { INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); break; }
+      INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); // add b unless equal to final a
+      MEMO_SET_A_DEPLETED = 1; // mark set a as depleted
+      return ITER_ADVANCE_B;
+    case EOF_B:
+      // if set a is also depleted, add b unless equal to final a and break
+      if (MEMO_SET_A_DEPLETED) { INSERT_UNLESS_EQUAL(b, a, MEMO_HASH); break; }
+      INSERT_UNLESS_EQUAL(a, b, MEMO_HASH); // add a unless equal to final b
+      MEMO_SET_B_DEPLETED = 1; // mark set b as depleted
+      return ITER_ADVANCE_A;
+  }
+  return ITER_END;
+}
+// Returns a new set that is a XOR result of SET_A and SET_B.
+static VALUE
+method_exclusion(VALUE self, VALUE set_a, VALUE set_b) {
+  return parallel_build(set_a, set_b, add_xor_members_to_hash);
+}
+#define INCR_FIXNUM_ID(id) (id += 2)
+#define DECR_FIXNUM_ID(id) (id -= 2)
+#define GET_RANGE_FIXNUM_IDS(range, from_id, upto_id) \
+  int excl; \
+  if (!rb_range_values(range, &from_id, &upto_id, &excl)) { \
+    rb_raise(rb_eArgError, "Pass a Range"); \
+  } \
+  if (excl) DECR_FIXNUM_ID(upto_id); \
+  Check_Type(from_id, T_FIXNUM); \
+  Check_Type(upto_id, T_FIXNUM);
+// Fills HASH will all Fixnums in RANGE.
+static VALUE
+method_fill_with_fixnums(VALUE self, VALUE hash, VALUE range) {
+  VALUE from_id, upto_id;
+  st_table *tbl;
+  GET_RANGE_FIXNUM_IDS(range, from_id, upto_id);
+  tbl = RHASH_TBL(hash);
+  while (from_id <= upto_id) {
+    st_insert(tbl, from_id, Qtrue);
+    INCR_FIXNUM_ID(from_id);
+  }
+  return upto_id;
+}
+inline static void
+insert_fixnum_id(st_table *tbl, VALUE id, int ucp_only) {
+  if (!ucp_only || id <= 0x1B000 || id >= 0x1C000) {
+    st_insert(tbl, id, Qtrue);
+  }
+}
+// Returns a new set that is a XOR result of SET and the given RANGE.
+static VALUE
+method_invert_fixnum_set(VALUE self, VALUE set, VALUE range, VALUE ucp) {
+  VALUE fixnum_id, upto_id, new_hash, new_set, entry;
+  st_index_t size, i;
+  int ucp_only;
+  st_table *new_tbl;
+  struct LOC_st_stable_entry *entries;
+  GET_RANGE_FIXNUM_IDS(range, fixnum_id, upto_id);
+  ucp_only = ucp != Qfalse && ucp != Qnil && ucp != Qundef;
+  // get set members
+  entries = set_entries_ptr(set, &size);
+  // prepare new Set
+  new_set = rb_class_new_instance(0, 0, RBASIC(set)->klass);
+  new_hash = rb_hash_new();
+  new_tbl = RHASH_TBL(new_hash);
+  rb_iv_set(new_set, "@hash", new_hash);
+  if (size) {
+    i = 0;
+    entry = entries[i].key;
+    // here is the optimization: skipping unneeded comparisons with lower values
+    for (;;) {
+      if (fixnum_id == entry) {
+        // fixnum_id is in set, compare next fixnum with next set member
+        entry = entries[++i].key;
+        INCR_FIXNUM_ID(fixnum_id);
+        if (i == size || fixnum_id > upto_id) break;
+      }
+      else if (fixnum_id < entry) {
+        // fixnum_id is not in set, include in inversion
+        insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
+        INCR_FIXNUM_ID(fixnum_id);
+        if (fixnum_id > upto_id) break;
+      }
+      else /* if (fixnum_id > entry) */ {
+        // gap; fixnum_id might be in set, check next set member
+        entry = entries[++i].key;
+        if (i == size) break;
+      }
+    }
+  }
+  // include all fixnums beyond the range of the set
+  while (fixnum_id <= upto_id) {
+    insert_fixnum_id(new_tbl, fixnum_id, ucp_only);
+    INCR_FIXNUM_ID(fixnum_id);
+  }
+  set_max_ivar_for_set(new_set);
+  rb_obj_freeze(new_hash);
+  return new_set;
+}
+void Init_immutable_set() {
+  VALUE mod;
+  mod = rb_define_module("ImmutableSetExt");
+  rb_define_singleton_method(mod, "difference",        method_difference,        2);
+  rb_define_singleton_method(mod, "exclusion",         method_exclusion,         2);
+  rb_define_singleton_method(mod, "fill_with_fixnums", method_fill_with_fixnums, 2);
+  rb_define_singleton_method(mod, "intersect?",        method_intersect_p,       2);
+  rb_define_singleton_method(mod, "intersection",      method_intersection,      2);
+  rb_define_singleton_method(mod, "invert_fixnum_set", method_invert_fixnum_set, 3);
+  rb_define_singleton_method(mod, "subset?",           method_subset_p,          2);
+  rb_define_singleton_method(mod, "superset?",         method_superset_p,        2);
+  rb_define_singleton_method(mod, "union",             method_union,             2);
+}
+#endif // end of #ifndef HAVE_STRUCT_ST_TABLE_ENTRIES ... #else ...