RubyGems - immutable_set - Versions diffs - 0.1.0 - Mend

immutable_set 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +7 -0
data/.gitignore +31 -0
data/.rspec +3 -0
data/.travis.yml +10 -0
data/BENCHMARK.md +131 -0
data/Gemfile +6 -0
data/LICENSE.txt +21 -0
data/README.md +83 -0
data/Rakefile +85 -0
data/bin/console +18 -0
data/bin/setup +8 -0
data/ext/immutable_set/extconf.rb +7 -0
data/ext/immutable_set/immutable_set.c +445 -0
data/immutable_set.gemspec +33 -0
data/lib/immutable_set.rb +50 -0
data/lib/immutable_set/builder_methods.rb +60 -0
data/lib/immutable_set/disable_mutating_methods.rb +12 -0
data/lib/immutable_set/inversion.rb +13 -0
data/lib/immutable_set/native_ext.rb +19 -0
data/lib/immutable_set/pure.rb +5 -0
data/lib/immutable_set/ruby_fallback.rb +148 -0
data/lib/immutable_set/stdlib_set_method_overrides.rb +155 -0
data/lib/immutable_set/version.rb +3 -0
metadata +137 -0

data/immutable_set.gemspec ADDED

@@ -0,0 +1,33 @@
+lib = File.expand_path('lib', __dir__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'set'
+require 'immutable_set/version'
+Gem::Specification.new do |s|
+  s.name          = 'immutable_set'
+  s.version       = ImmutableSet::VERSION
+  s.authors       = ['Janosch Müller']
+  s.email         = ['janosch84@gmail.com']
+  s.summary       = "A faster, immutable replacement for Ruby's Set"
+  s.homepage      = 'https://github.com/janosch-x/immutable_set'
+  s.license       = 'MIT'
+  s.files         = `git ls-files -z`.split("\x0").reject do |f|
+    f.match(%r{^(benchmarks|test|spec|features)/})
+  end
+  s.executables   = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  s.require_paths = ['lib']
+  if RUBY_PLATFORM !~ /java/i
+    s.extensions  = %w[ext/immutable_set/extconf.rb]
+  end
+  s.required_ruby_version = '>= 2.0.0'
+  s.add_development_dependency 'benchmark-ips', '~> 2.7'
+  s.add_development_dependency 'bundler', '~> 1.16'
+  s.add_development_dependency 'rake', '~> 10.0'
+  s.add_development_dependency 'rake-compiler', '~> 1.0'
+  s.add_development_dependency 'rspec', '~> 3.0'
+end

data/lib/immutable_set.rb ADDED

@@ -0,0 +1,50 @@
+require 'set'
+require 'immutable_set/builder_methods'
+require 'immutable_set/native_ext'
+require 'immutable_set/disable_mutating_methods'
+require 'immutable_set/inversion'
+require 'immutable_set/pure'
+require 'immutable_set/ruby_fallback'
+require 'immutable_set/stdlib_set_method_overrides'
+require 'immutable_set/version'
+class ImmutableSet < Set
+  attr_reader :max
+  def initialize(arg = nil)
+    @hash = Hash.new(false)
+    if arg.is_a?(ImmutableSet)
+      @hash = arg.instance_variable_get(:@hash)
+      @max = arg.max
+    elsif arg.is_a?(Range)
+      self.class.send(:feed_range_to_hash, arg, @hash)
+      @max = arg.max
+    elsif arg.respond_to?(:to_a)
+      sorted_arg = arg.to_a.sort
+      if block_given?
+        sorted_arg.each { |o| @hash[yield(o)] = true }
+      else
+        sorted_arg.each { |o| @hash[o] = true }
+      end
+      @max = sorted_arg.last
+    elsif !arg.nil?
+      raise ArgumentError, 'value must be enumerable'
+    end
+    @hash.freeze
+  end
+  def min
+    @min ||= (first_key, = @hash.first) && first_key
+  end
+  def minmax
+    [min, max]
+  end
+  def distinct_bounds?(other)
+    raise ArgumentError, 'pass an ImmutableSet' unless other.is_a?(ImmutableSet)
+    empty? || other.empty? || (min > other.max || max < other.min)
+  end
+end

data/lib/immutable_set/builder_methods.rb ADDED

@@ -0,0 +1,60 @@
+#
+# Builder methods that set @hash and @max.
+#
+class ImmutableSet < Set
+  class << self
+    # Returns an ImmutableSet.
+    #
+    # Its members will be ordered, irrespective of the order of passed Ranges.
+    def from_ranges(*ranges)
+      build_with_hash_and_max do |new_hash|
+        highest_max = nil
+        Array(ranges).sort_by(&:min).each do |range|
+          feed_range_to_hash(range, new_hash)
+          highest_max = [highest_max || range.max, range.max].max
+        end
+        highest_max
+      end
+    end
+    # Returns an ImmutableSet.
+    #
+    # This method can be directly passed a Hash and a max value.
+    # It also yields the Hash (or a new Hash if none is given) to any
+    # given block, to allow filling it while it is already attached to the
+    # new set, which can offer performance benefits for large hashes.
+    # If a block is given and no max is passed as parameter, the block must
+    # return the new max.
+    #
+    # Make sure to pass the *correct* max of the new Set, or things will break.
+    def build_with_hash_and_max(hash = nil, max = nil)
+      hash ||= Hash.new(false)
+      set = new
+      set.instance_variable_set(:@hash, hash)
+      max = yield(hash) if block_given?
+      raise ArgumentError, 'pass a comparable max' unless max.respond_to?(:<=>)
+      hash.freeze
+      set.instance_variable_set(:@max, max)
+      set
+    end
+    # Returns an ImmutableSet.
+    #
+    # Used to cast Enumerables to ImmutableSet if needed for comparisons.
+    def cast(obj)
+      obj.is_a?(ImmutableSet) ? obj : new(obj)
+    end
+    private
+    def feed_range_to_hash(range, hash)
+      if native_ext && range.begin.object_id.odd? && range.end.object_id.odd?
+        native_ext.fill_with_fixnums(hash, range)
+      else
+        range.each { |o| hash[o] = true }
+      end
+    end
+  end
+end

data/lib/immutable_set/disable_mutating_methods.rb ADDED

@@ -0,0 +1,12 @@
+class ImmutableSet < Set
+  DISABLED_METHODS = %i[<< clear clone dup keep_if merge replace reset subtract]
+                     .concat(instance_methods.grep(/^add|^delete|.!$/))
+  (DISABLED_METHODS & instance_methods).each { |method| undef_method(method) }
+  def method_missing(method_name, *args, &block)
+    super unless DISABLED_METHODS.include?(method_name)
+    raise NoMethodError, "##{method_name} can't be called on an ImmutableSet, "\
+                         'only on a Set/SortedSet. Use #+, #-, #^, #& instead.'
+  end
+end

data/lib/immutable_set/inversion.rb ADDED

@@ -0,0 +1,13 @@
+class ImmutableSet < Set
+  # Returns an ImmutableSet.
+  #
+  # The result includes all members `from`..`upto` that are not in self.
+  # If `ucp_only` is true, invalid unicode codepoints are omitted.
+  def inversion(from: nil, upto: nil, ucp_only: false)
+    if native_ext && from.object_id.odd? && upto.object_id.odd?
+      native_ext.invert_fixnum_set(self, from..upto, ucp_only)
+    else
+      RubyFallback.inversion(self, from..upto, ucp_only)
+    end
+  end
+end

data/lib/immutable_set/native_ext.rb ADDED

@@ -0,0 +1,19 @@
+class ImmutableSet < Set
+  native_ext_available =
+    begin
+      require_relative './immutable_set'
+      Kernel.const_defined?(:ImmutableSetExt)
+    rescue LoadError
+      false
+    end
+  if native_ext_available
+    def self.native_ext; ::ImmutableSetExt end
+  else
+    def self.native_ext; end
+  end
+  def native_ext
+    self.class.native_ext
+  end
+end

data/lib/immutable_set/pure.rb ADDED

@@ -0,0 +1,5 @@
+class ImmutableSet < Set
+  class Pure < ImmutableSet
+    def self.native_ext; end
+  end
+end

data/lib/immutable_set/ruby_fallback.rb ADDED

@@ -0,0 +1,148 @@
+class ImmutableSet < Set
+  module RubyFallback
+    module_function
+    def inversion(set, range, ucp_only)
+      from = range.begin
+      upto = range.end
+      set.class.build_with_hash_and_max do |new_hash|
+        own_min, own_max = set.minmax
+        new_max = nil
+        insertion_proc = ->(o) do
+          return if ucp_only && o >= 0xD800 && o <= 0xDFFF
+          new_hash[o] = true
+          new_max = o
+        end
+        if own_max.nil?
+          # empty Set - inversion is pretty much equal to Set[from..upto]
+          from.upto(upto) { |o| insertion_proc.call(o) }
+          next new_max
+        end
+        own_hash = set.instance_variable_get(:@hash)
+        o = from
+        # insert all below own lower boundary without check
+        while o < own_min && o <= upto
+          insertion_proc.call(o)
+          o = o.next
+        end
+        # insert with check within bounds
+        while o <= own_max && o <= upto
+          insertion_proc.call(o) unless own_hash.key?(o)
+          o = o.next
+        end
+        # insert all above own upper boundary without check
+        while o <= upto
+          insertion_proc.call(o)
+          o = o.next
+        end
+        new_max
+      end
+    end
+    def union(set_a, set_b)
+      a_min, a_max = set_a.minmax
+      b_min, b_max = set_b.minmax
+      a_hash = set_a.instance_variable_get(:@hash)
+      b_hash = set_b.instance_variable_get(:@hash)
+      # disjoint sets case (self wholly below b)
+      if a_max < b_min
+        hash = a_hash.dup.update(b_hash)
+        return set_a.class.build_with_hash_and_max(hash, b_max)
+      # disjoint sets case (b wholly below self)
+      elsif b_max < a_min
+        hash = b_hash.dup.update(a_hash)
+        return set_a.class.build_with_hash_and_max(hash, a_max)
+      end
+      # sets with overlapping bounds case - insert objects in order
+      set_a.class.build_with_hash_and_max do |new_hash|
+        a_keys = a_hash.keys
+        b_keys = b_hash.keys
+        a_key = a_keys[i = 0]
+        b_key = b_keys[j = 0]
+        while a_key && b_key
+          if a_key < b_key
+            new_hash[a_key] = true
+            a_key = a_keys[i += 1]
+          else
+            new_hash[b_key] = true
+            b_key = b_keys[j += 1]
+          end
+        end
+        remaining_keys, offset = a_key ? [a_keys, i] : [b_keys, j]
+        remaining_size = remaining_keys.size
+        while offset < remaining_size
+          new_hash[remaining_keys[offset]] = true
+          offset += 1
+        end
+        [a_max, b_max].max
+      end
+    end
+    def difference(set_a, set_b)
+      new_hash = set_a.instance_variable_get(:@hash).dup
+      set_b.each { |o| new_hash.delete(o) }
+      set_a.class.build_with_hash_and_max(new_hash, new_hash.keys.last)
+    end
+    def intersection(set_a, set_b)
+      set_a.class.build_with_hash_and_max do |new_hash|
+        a_keys = set_a.to_a
+        a_max  = set_a.max
+        b_keys = set_b.to_a
+        b_max  = set_b.max
+        a_key = a_keys[i = 0]
+        b_key = b_keys[j = 0]
+        while a_key && b_key && a_key <= b_max && b_key <= a_max
+          if a_key == b_key
+            new_hash[a_key] = true
+            a_key = a_keys[i += 1]
+            b_key = b_keys[j += 1]
+          elsif a_key < b_key
+            a_key = a_keys[i += 1]
+          else # a_key > b_key
+            b_key = b_keys[j += 1]
+          end
+        end
+        [a_max, b_max].min
+      end
+    end
+    def intersect?(set_a, set_b)
+      cmp = ->(smaller_set, larger_set) do
+        return false if smaller_set.distinct_bounds?(larger_set)
+        larger_set_min, larger_set_max = larger_set.minmax
+        smaller_set.any? do |smaller_set_obj|
+          next         if smaller_set_obj < larger_set_min
+          return false if smaller_set_obj > larger_set_max
+          larger_set.include?(smaller_set_obj)
+        end
+      end
+      set_a.size < set_b.size ? cmp.call(set_a, set_b) : cmp.call(set_b, set_a)
+    end
+    def exclusion(set_a, set_b)
+      set_a.class.build_with_hash_and_max do |new_hash|
+        new_max = nil
+        set_a.each { |o| new_hash[new_max = o] = true unless set_b.include?(o) }
+        set_b.each { |o| new_hash[new_max = o] = true unless set_a.include?(o) }
+        new_max
+      end
+    end
+  end
+end

data/lib/immutable_set/stdlib_set_method_overrides.rb ADDED

@@ -0,0 +1,155 @@
+class ImmutableSet < Set
+  #
+  # These comparison methods only offer a big speed gain with the C extension,
+  # or on Ruby < 2.3 where `Set` has no access to Hash#<=>.
+  #
+  # In Ruby, bad Enumerator#next performance makes using two of them in parallel
+  # slower than just looking up everything (as #super does) for many cases.
+  #
+  def superset?(set)
+    return super unless native_ext_can_relate?(set)
+    potentially_superset_of?(set) && native_ext.superset?(self, set)
+  end
+  alias >= superset?
+  def proper_superset?(set)
+    return super unless native_ext_can_relate?(set)
+    potentially_proper_superset_of?(set) && native_ext.superset?(self, set)
+  end
+  alias > proper_superset?
+  def subset?(set)
+    return super unless native_ext_can_relate?(set)
+    potentially_subset_of?(set) && native_ext.subset?(self, set)
+  end
+  alias <= subset?
+  def proper_subset?(set)
+    return super unless native_ext_can_relate?(set)
+    potentially_proper_subset_of?(set) && native_ext.subset?(self, set)
+  end
+  alias < proper_subset?
+  #
+  # These methods are faster both with the C extension and the Ruby fallback.
+  #
+  def |(other)
+    raise_unless_enumerable(other)
+    return self if other.empty?
+    other = self.class.cast(other)
+    relate_with_method(:union, to_other: other)
+  end
+  alias + |
+  alias union |
+  def -(other)
+    raise_unless_enumerable(other)
+    return self if other.empty?
+    other = self.class.cast(other)
+    return self if distinct_bounds?(other)
+    relate_with_method(:difference, to_other: other)
+  end
+  alias difference -
+  def &(other)
+    raise_unless_enumerable(other)
+    return self.class.new if other.empty?
+    other = self.class.cast(other)
+    return self.class.new if distinct_bounds?(other)
+    relate_with_method(:intersection, to_other: other)
+  end
+  alias intersection &
+  def ^(other)
+    raise_unless_enumerable(other)
+    return other if empty?
+    return self if other.empty?
+    other = self.class.cast(other)
+    return self + other if distinct_bounds?(other)
+    relate_with_method(:exclusion, to_other: other)
+  end
+  # Set#intersect? at ~ O(m*n) *can* surpass ImmutableSet#intersect? at ~ O(m+n)
+  # for sets with *very* different sizes and unfortunately offset members.
+  # Example: Set[999_999].intersect?(Set.new(1..1_000_000))
+  STD_INTERSECT_THRESHOLD_RATIO = 1000
+  def intersect?(other)
+    raise_unless_enumerable(other)
+    return false if empty? || other.empty?
+    other = self.class.cast(other)
+    return false if distinct_bounds?(other)
+    smaller_size, larger_size = [size, other.size].minmax
+    return super if larger_size / smaller_size > STD_INTERSECT_THRESHOLD_RATIO
+    relate_with_method(:intersect?, to_other: other)
+  end
+  def classify
+    return super unless block_given?
+    classification_hash = {}
+    each do |o|
+      tmp = (classification_hash[yield(o)] ||= { data: {}, max: nil })
+      tmp[:data][o] = true
+      tmp[:max] = o
+    end
+    classification_hash.map do |k, v|
+      [k, self.class.build_with_hash_and_max(v[:data], v[:max])]
+    end.to_h
+  end
+  #
+  # The following private helper methods do not exist in the stdlib.
+  #
+  private
+  def raise_unless_enumerable(obj)
+    raise ArgumentError, 'value must be enumerable' unless obj.respond_to? :each
+  end
+  def relate_with_method(method, to_other: nil)
+    relate_module(to_other).__send__(method, self, to_other)
+  end
+  def relate_module(other)
+    native_ext_can_relate?(other) ? native_ext : RubyFallback
+  end
+  # The C extension can relate two sets if it is loaded, the other set is also
+  # an ImmutableSet, neither is empty, and members are comparable between sets.
+  def native_ext_can_relate?(other)
+    native_ext && other.is_a?(ImmutableSet) && max && (max <=> other.max)
+  end
+  #
+  # These are some very fast sanity checks that can improve clear-cut cases.
+  # e.g.: a set with shorter bounds (at any end) can never be a superset.
+  # This brings huge improvements on Ruby < 2.3 (Rubies without Hash#<=>).
+  #
+  def potentially_subset_of?(other)
+    min >= other.min && max <= other.max
+  end
+  def potentially_proper_subset_of?(other)
+    potentially_subset_of?(other) && (min > other.min || max < other.max)
+  end
+  def potentially_superset_of?(other)
+    min <= other.min && max >= other.max
+  end
+  def potentially_proper_superset_of?(other)
+    potentially_superset_of?(other) && (min < other.min || max > other.max)
+  end
+end