RubyGems - js_regex - Versions diffs - 1.0.19 → 1.1.0 - Mend

js_regex 1.0.19 → 1.1.0

Files changed (23) hide show

checksums.yaml +4 -4
data/lib/js_regex/conversion.rb +24 -22
data/lib/js_regex/converter/anchor_converter.rb +2 -0
data/lib/js_regex/converter/assertion_converter.rb +4 -2
data/lib/js_regex/converter/backreference_converter.rb +2 -0
data/lib/js_regex/converter/base.rb +3 -1
data/lib/js_regex/converter/conditional_converter.rb +3 -1
data/lib/js_regex/converter/context.rb +81 -19
data/lib/js_regex/converter/escape_converter.rb +4 -4
data/lib/js_regex/converter/freespace_converter.rb +2 -0
data/lib/js_regex/converter/group_converter.rb +24 -32
data/lib/js_regex/converter/literal_converter.rb +30 -16
data/lib/js_regex/converter/meta_converter.rb +3 -2
data/lib/js_regex/converter/nonproperty_converter.rb +8 -1
data/lib/js_regex/converter/property_converter.rb +17 -12
data/lib/js_regex/converter/quantifier_converter.rb +7 -5
data/lib/js_regex/converter/set_converter.rb +41 -36
data/lib/js_regex/converter/type_converter.rb +5 -3
data/lib/js_regex/converter/unsupported_token_converter.rb +2 -0
data/lib/js_regex/property_map.rb +5 -2
data/lib/js_regex/version.rb +5 -0
data/lib/js_regex.rb +1 -0
metadata +35 -14

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 2429773ea6bbeb5d7eed4ffa01742f0cecc4df77
-  data.tar.gz: b5b3d8380986a844cf8577cf76a55ec846fc8eb6
+  metadata.gz: 1eb9a34e224340fb10bcaac25b7adee037279a2e
+  data.tar.gz: a50cc191b462e501e194308f9eb06c893b177657
 SHA512:
-  metadata.gz: 2728a8ceb9ee272aabd1d30b6bd2c16154abc104f1c7cd3f2da7bc357582773c6d68afea5cc3a5844cae49bf7090ec3da6eacca86252b0543406533120b264fe
-  data.tar.gz: 25d2e9dd9fbeda9cf624200565bca5e6603fd082549ecc0dab790fa3fb979eb03cab4105c15156123569dad079ac3ca31afe76e532a29d0f0161b60e8d95bc2f
+  metadata.gz: d7156fa441d772630f4d1f947e029f11355c74c85a563cf8b5ed09f977930e238c0e7951856da0163d8c255620ae3a64f225d50b5d77f77fbe60025052929f46
+  data.tar.gz: 72bb8e367bc70bef958957ecc8444af459e26e4aa4111fcae10d6bd80fb863120161c13a5beedb9210a700fcfa0896004b4f06605a54bebe96dd32a4831ab7f6

data/lib/js_regex/conversion.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class JsRegex
   #
   # This class acts as a facade, creating specific Converters and
@@ -12,17 +14,17 @@ class JsRegex
     attr_reader :ruby_regex, :context, :converters, :source, :options, :warnings
     def initialize(ruby_regex)
-      @ruby_regex = ruby_regex
+      self.ruby_regex = ruby_regex
-      @context    = Converter::Context.new
-      @converters = {}
+      self.context    = Converter::Context.new
+      self.converters = {}
-      @source     = ''
-      @options    = ''
-      @warnings   = []
+      self.source     = ''.dup
+      self.options    = ''.dup
+      self.warnings   = []
-      convert_source(ruby_regex)
-      convert_options(ruby_regex)
+      convert_source
+      convert_options
       perform_sanity_check
     end
@@ -33,6 +35,8 @@ class JsRegex
     private
+    attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
     CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
       anchor:      Converter::AnchorConverter,
       assertion:   Converter::AssertionConverter,
@@ -51,36 +55,34 @@ class JsRegex
       type:        Converter::TypeConverter
     ).freeze
-    def convert_source(ruby_regex)
+    def convert_source
       Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
         # There might be a lot of tokens, so don't wrap their data in objects.
         # Even just wrapping them in simple structs or attr_reader objects
         # can lead to 60%+ longer processing times for large regexes.
-        convert_token(token_class, subtype, data, s, e)
+        converter_for_token_class(token_class)
+          .convert(token_class, subtype, data, s, e)
       end
-      converters.clear
-    end
-    def convert_token(token_class, subtype, data, s, e)
-      converter = converter_for_token_class(token_class)
-      converter.convert(token_class, subtype, data, s, e)
     end
     def converter_for_token_class(token_class)
       converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
     end
-    def convert_options(ruby_regex)
-      @options = 'g' # all Ruby regexes are what is called "global" in JS
-      @options << 'i' if ruby_regex.options & Regexp::IGNORECASE > 0
+    def convert_options
+      options << 'g' # all Ruby regexes are what is called "global" in JS
+      options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
     end
+    SURROGATE_CODEPOINT_PATTERN = /\\uD[89A-F]\h\h/i
     def perform_sanity_check
       # Ruby regex capabilities are a superset of JS regex capabilities in
-      # the source part. So if this raises an Error, a Converter messed up:
-      Regexp.new(source, options)
+      # the source part. So if this raises an Error, a Converter messed up.
+      # Ignore that Ruby won't accept surrogate pairs, though.
+      Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
     rescue ArgumentError, RegexpError, SyntaxError => e
-      @source = ''
+      self.source = ''
       warnings << e.message
     end
   end

data/lib/js_regex/converter/anchor_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex

data/lib/js_regex/converter/assertion_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 require_relative 'group_converter'
@@ -14,9 +16,9 @@ class JsRegex
       def convert_data
         case subtype
         when :lookahead, :nlookahead
-          open_group(non_capturing: true)
+          open_group(capturing: false)
         when :nlookbehind
-          context.negative_lookbehind = true
+          context.start_negative_lookbehind
           warn_of_unsupported_feature('negative lookbehind assertion')
         else # :lookbehind, ...
           open_unsupported_group

data/lib/js_regex/converter/backreference_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex

data/lib/js_regex/converter/base.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class JsRegex
   module Converter
     #
@@ -31,7 +33,7 @@ class JsRegex
       def warn_of_unsupported_feature(description = nil)
         description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
         target.warnings << "Dropped unsupported #{description} "\
-                           "at index #{start_index}..#{end_index}"
+                           "at index #{start_index}...#{end_index}"
         ''
       end
     end

data/lib/js_regex/converter/conditional_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex
@@ -11,7 +13,7 @@ class JsRegex
       def convert_data
         case subtype
         when :open
-          warn_of_unsupported_feature("conditional '(?'")
+          warn_of_unsupported_feature("conditional '(?('")
           '('
         when :separator, :close
           pass_through

data/lib/js_regex/converter/context.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class JsRegex
   module Converter
     #
@@ -6,25 +8,20 @@ class JsRegex
     # The Converters themselves are stateless.
     #
     class Context
-      attr_accessor :buffered_set_extractions,
-                    :buffered_set_members,
-                    :captured_group_count,
-                    :group_count_changed,
-                    :group_level,
-                    :group_level_for_backreference,
-                    :negative_lookbehind,
-                    :negative_set_levels,
-                    :previous_quantifier_end,
-                    :previous_quantifier_subtype,
-                    :set_level
+      attr_accessor :previous_quantifier_end # , :previous_quantifier_type
+      attr_reader :buffered_set_extractions,
+                  :buffered_set_members,
+                  :captured_group_count,
+                  :group_count_changed,
+                  :group_level_for_backreference,
+                  :negative_lookbehind
       def initialize
         self.buffered_set_members = []
         self.buffered_set_extractions = []
         self.captured_group_count = 0
-        self.group_count_changed = false
         self.group_level = 0
-        self.negative_lookbehind = false
         self.negative_set_levels = []
         self.set_level = 0
       end
@@ -36,16 +33,24 @@ class JsRegex
       # set context
       def open_set
-        self.set_level += 1
+        self.set_level = set_level + 1
         if set_level == 1
           buffered_set_members.clear
           buffered_set_extractions.clear
         end
-        self.negative_set_levels -= [set_level]
+        negative_set_levels.delete(set_level)
       end
       def negate_set
-        self.negative_set_levels |= [set_level]
+        self.negative_set_levels = negative_set_levels | [set_level]
+      end
+      def close_set
+        self.set_level = set_level - 1
+      end
+      def set?
+        set_level > 0
       end
       def negative_set?(level = set_level)
@@ -53,12 +58,69 @@ class JsRegex
       end
       def nested_negation?
-        set_level > 1 && negative_set?
+        nested_set? && negative_set?
       end
-      def close_set
-        self.set_level -= 1
+      def nested_set?
+        set_level > 1
+      end
+      # group context
+      def open_group
+        self.group_level = group_level + 1
+      end
+      def capture_group
+        self.captured_group_count = captured_group_count + 1
+      end
+      def start_atomic_group
+        self.group_level_for_backreference = group_level
+      end
+      def start_negative_lookbehind
+        self.negative_lookbehind = true
+      end
+      def close_group
+        self.group_level = group_level - 1
+      end
+      def close_atomic_group
+        close_group
+        self.group_level_for_backreference = nil
+        self.group_count_changed = true
       end
+      def close_negative_lookbehind
+        close_group
+        self.negative_lookbehind = false
+      end
+      def group?
+        group_level > 0
+      end
+      def atomic_group?
+        group_level_for_backreference
+      end
+      def base_level_of_atomic_group?
+        group_level_for_backreference &&
+          group_level.equal?(group_level_for_backreference + 1)
+      end
+      private
+      attr_accessor :group_level, :negative_set_levels, :set_level
+      attr_writer :buffered_set_extractions,
+                  :buffered_set_members,
+                  :captured_group_count,
+                  :group_count_changed,
+                  :group_level_for_backreference,
+                  :negative_lookbehind
     end
   end
 end

data/lib/js_regex/converter/escape_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 require_relative 'literal_converter'
@@ -24,19 +26,17 @@ class JsRegex
              :newline,
              :octal,
              :one_or_more,
-             :return,
              :set_close,
              :set_open,
-             :space,
              :tab,
              :vertical_tab,
              :zero_or_more,
              :zero_or_one
           pass_through
         when :literal
-          LiteralConverter.convert(data, self)
+          LiteralConverter.convert_data(data)
         else
-          # Backspace, Bell, HexWide, Control, Meta, MetaControl, ...
+          # Bell, Escape, HexWide, Control, Meta, MetaControl, ...
           warn_of_unsupported_feature
         end
       end

data/lib/js_regex/converter/freespace_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex

data/lib/js_regex/converter/group_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex
@@ -13,7 +15,7 @@ class JsRegex
         when :atomic then open_atomic_group
         when :capture then open_group
         when :close then close_group
-        when :comment then '' # drop whole group w/o warning
+        when :comment then '' # drop whole group without warning
         when :named_ab, :named_sq then open_named_group
         when :options then open_options_group
         when :passive then open_passive_group
@@ -25,12 +27,16 @@ class JsRegex
         # Atomicity is emulated using backreferenced lookahead groups:
         # http://instanceof.me/post/52245507631
         # regex-emulate-atomic-grouping-with-lookahead
-        context.group_level_for_backreference = context.group_level
-        open_group(head: '(?=(')
+        if context.atomic_group?
+          open_unsupported_group('nested atomic group')
+        else
+          context.start_atomic_group
+          open_group(head: '(?=(')
+        end
       end
       def open_named_group
-        # drop name w/o warning
+        # drop name without warning
         open_group(head: '(')
       end
@@ -40,47 +46,33 @@ class JsRegex
       end
       def open_passive_group
-        open_group(head: '(?:', non_capturing: true)
+        open_group(head: '(?:', capturing: false)
       end
-      def open_unsupported_group
-        warn_of_unsupported_feature
+      def open_unsupported_group(description = nil)
+        warn_of_unsupported_feature(description)
         open_passive_group
       end
-      def open_group(options = {})
-        context.group_level += 1
-        context.captured_group_count += 1 unless options[:non_capturing]
-        options[:head] || pass_through
+      def open_group(opts = {})
+        context.open_group
+        context.capture_group unless opts[:capturing].equal?(false)
+        opts[:head] || pass_through
       end
       def close_group
-        context.group_level -= 1
         if context.negative_lookbehind
-          close_negative_lookbehind
-        elsif end_of_atomic_group?
-          close_atomic_group
+          context.close_negative_lookbehind
+          ''
+        elsif context.base_level_of_atomic_group?
+          context.close_atomic_group
+          # an empty passive group (?:) is appended as literal digits may follow
+          "))\\#{context.captured_group_count}(?:)"
         else
+          context.close_group
           ')'
         end
       end
-      def close_negative_lookbehind
-        context.negative_lookbehind = false
-        ''
-      end
-      def end_of_atomic_group?
-        return false unless context.group_level_for_backreference
-        context.group_level_for_backreference == context.group_level
-      end
-      def close_atomic_group
-        context.group_level_for_backreference = nil
-        context.group_count_changed = true
-        # the empty passive group (?:) is appended in case literal digits follow
-        "))\\#{context.captured_group_count}(?:)"
-      end
     end
   end
 end

data/lib/js_regex/converter/literal_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex
@@ -6,30 +8,42 @@ class JsRegex
     # Template class implementation.
     #
     class LiteralConverter < JsRegex::Converter::Base
-      def self.convert(data, converter)
-        utf8_data = data.dup.force_encoding('UTF-8')
-        if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
-          converter.send(:warn_of_unsupported_feature, 'astral plane character')
-        else
-          escape_literal_forward_slashes(utf8_data)
-          ensure_json_compatibility(utf8_data)
-          utf8_data
+      class << self
+        ASTRAL_PLANE_CODEPOINT_PATTERN = /\A[\u{10000}-\u{FFFFF}]\z/
+        def convert_data(data)
+          if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
+            surrogate_pair_for(data)
+          else
+            escape_literal_forward_slashes(data)
+            ensure_json_compatibility(data)
+            data
+          end
         end
-      end
-      def self.escape_literal_forward_slashes(data)
-        # literal slashes would be mistaken for the pattern end in JsRegex#to_s
-        data.gsub!('/', '\\/')
-      end
+        private
-      def self.ensure_json_compatibility(data)
-        data.gsub!(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
+        def surrogate_pair_for(astral_char)
+          base = astral_char.codepoints.first - 65_536
+          high = ((base / 1024).floor + 55_296).to_s(16)
+          low  = (base % 1024 + 56_320).to_s(16)
+          "\\u#{high}\\u#{low}"
+        end
+        def escape_literal_forward_slashes(data)
+          # literal slashes would signify the pattern end in JsRegex#to_s
+          data.gsub!('/', '\\/')
+        end
+        def ensure_json_compatibility(data)
+          data.gsub!(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
+        end
       end
       private
       def convert_data
-        self.class.convert(data, self)
+        self.class.convert_data(data)
       end
     end
   end

data/lib/js_regex/converter/meta_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex
@@ -20,8 +22,7 @@ class JsRegex
       end
       def ruby_multiline_mode?
-        return false if @rb_mm == false
-        @rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
+        (target.ruby_regex.options & Regexp::MULTILINE).nonzero?
       end
     end
   end

data/lib/js_regex/converter/nonproperty_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 require_relative 'property_converter'
@@ -12,7 +14,12 @@ class JsRegex
       private
       def convert_data
-        convert_property(true)
+        if context.set?
+          context.buffered_set_extractions << convert_property(true)
+          ''
+        else
+          convert_property(true)
+        end
       end
     end
   end

data/lib/js_regex/converter/property_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 require_relative File.join('..', 'property_map')
@@ -7,18 +9,21 @@ class JsRegex
     # Template class implementation.
     #
     class PropertyConverter < JsRegex::Converter::Base
-      def self.property_replacement(property_name, negated = false)
-        replacement = JsRegex::PROPERTY_MAP[property_name.downcase.to_sym]
-        negated ? negated_property_replacement(replacement) : replacement
-      end
+      class << self
+        def property_replacement(property_name, negated = nil)
+          replacement = PROPERTY_MAP[property_name.downcase.to_sym]
+          negated ? negated_property_replacement(replacement) : replacement
+        end
+        private
-      def self.negated_property_replacement(property_string)
-        # take care not to use destructive methods on elements in the map
-        return nil unless property_string
-        if property_string.start_with?('[^')
-          property_string.sub('[^', '[')
-        else
-          property_string.sub('[', '[^')
+        def negated_property_replacement(property_string)
+          return nil unless property_string
+          if property_string.start_with?('[^')
+            property_string.sub('[^', '[')
+          else
+            property_string.sub('[', '[^')
+          end
         end
       end
@@ -28,7 +33,7 @@ class JsRegex
         convert_property
       end
-      def convert_property(negated = false)
+      def convert_property(negated = nil)
         replace = self.class.property_replacement(subtype, negated)
         replace || warn_of_unsupported_feature
       end

data/lib/js_regex/converter/quantifier_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex
@@ -10,9 +12,9 @@ class JsRegex
       def convert_data
         if multiplicative_interval?
-          warn_of_unsupported_feature('multiplicative interval \'{x}{x}\'')
+          warn_of_unsupported_feature('adjacent quantifiers')
         else
-          context.previous_quantifier_subtype = subtype
+          # context.previous_quantifier_type = subtype
           context.previous_quantifier_end = end_index
           convert_quantifier
         end
@@ -28,9 +30,9 @@ class JsRegex
       end
       def multiplicative_interval?
-        subtype == :interval &&
-          context.previous_quantifier_subtype == :interval &&
-          context.previous_quantifier_end == start_index
+        # subtype == :interval &&
+        #  context.previous_quantifier_type == :interval &&
+        context.previous_quantifier_end.equal?(start_index)
       end
     end
   end

data/lib/js_regex/converter/set_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 require_relative 'literal_converter'
 require_relative 'property_converter'
@@ -26,12 +28,14 @@ class JsRegex
         when :member, :range, :escape then convert_member_subtype
         when /\Aclass_/ then convert_class_subtype
         when /\Atype_/ then convert_type_subtype
+        when :backspace then convert_backspace_subtype
         when :intersection
           warn_of_unsupported_feature("set intersection '&&'")
         else
-          # TODO: I think it's a bug in Regexp::Scanner that some property
-          # tokens (only positive ones?) are returned with the token class :set
-          # within sets. If this's fixed, just warn_of_unsupported_feature here.
+          # Note that, within sets, Regexp::Scanner returns
+          # - positive property tokens in the \p{-style with class :set
+          # - negative property tokens in the \P{-style with class :set
+          # - negative property tokens in the \p{^-style with class :nonproperty
           try_replacing_potential_property_subtype
         end
       end
@@ -42,7 +46,7 @@ class JsRegex
       end
       def convert_negate_subtype
-        if context.set_level > 1
+        if context.nested_set?
           warn_of_unsupported_feature('nested negative set data')
         end
         context.negate_set
@@ -51,25 +55,28 @@ class JsRegex
       def convert_close_subtype
         context.close_set
-        context.set_level == 0 ? finalize_set : ''
+        context.set? ? '' : finalize_set
       end
       def convert_member_subtype
-        literal_conversion = LiteralConverter.convert(data, self)
-        return '' if literal_conversion == ''
-        buffer_set_member(literal_conversion)
+        utf8_data = data.force_encoding('UTF-8')
+        if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
+          warn_of_unsupported_feature('astral plane set member')
+        else
+          literal_conversion = LiteralConverter.convert_data(utf8_data)
+          buffer_set_member(literal_conversion)
+        end
       end
       def convert_class_subtype
         negated = subtype.to_s.start_with?('class_non')
-        name = subtype.to_s[(negated ? 9 : 6)..-1]
+        name = subtype[(negated ? 9 : 6)..-1]
         try_replacing_property(name, negated)
       end
       def try_replacing_potential_property_subtype
-        negated = subtype.to_s.start_with?('non')
-        name = negated ? subtype.to_s[3..-1] : subtype.to_s
-        try_replacing_property(name, negated)
+        negated = data.start_with?('\\P')
+        try_replacing_property(subtype, negated)
       end
       def try_replacing_property(name, negated)
@@ -82,47 +89,40 @@ class JsRegex
       end
       def convert_type_subtype
-        if subtype == :type_hex
+        if subtype.equal?(:type_hex)
           buffer_set_extraction(TypeConverter::HEX_EXPANSION)
-        elsif subtype == :type_nonhex
+        elsif subtype.equal?(:type_nonhex)
           buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
         else
           buffer_set_member(data)
         end
       end
-      def buffer_set_member(string)
-        buffered_members << string unless context.nested_negation?
-        ''
+      def convert_backspace_subtype
+        buffer_set_extraction('[\b]')
       end
-      def buffer_set_extraction(string)
-        buffered_extractions << string unless context.nested_negation?
+      def buffer_set_member(m)
+        context.buffered_set_members << m unless context.nested_negation?
         ''
       end
-      def buffered_members
-        context.buffered_set_members
-      end
-      def buffered_extractions
-        context.buffered_set_extractions
+      def buffer_set_extraction(e)
+        context.buffered_set_extractions << e unless context.nested_negation?
+        ''
       end
       def finalize_set
-        if buffered_members.none?
-          finalize_depleted_set
+        buffered_members     = context.buffered_set_members
+        buffered_extractions = context.buffered_set_extractions
+        if buffered_members.empty?
+          finalize_depleted_set(buffered_extractions)
         else
-          set = build_set(buffered_members, context.negative_set?(1))
-          if buffered_extractions.any?
-            "(?:#{set}|#{buffered_extractions.join('|')})"
-          else
-            set
-          end
+          finalize_nondepleted_set(buffered_members, buffered_extractions)
         end
       end
-      def finalize_depleted_set
+      def finalize_depleted_set(buffered_extractions)
         case buffered_extractions.count
         when 0 then ''
         when 1 then buffered_extractions.first
@@ -130,8 +130,13 @@ class JsRegex
         end
       end
-      def build_set(members, negative)
-        "[#{negative ? '^' : ''}#{members.join}]"
+      def finalize_nondepleted_set(buffered_members, buffered_extractions)
+        set = "[#{'^' if context.negative_set?(1)}#{buffered_members.join}]"
+        if buffered_extractions.empty?
+          set
+        else
+          "(?:#{set}|#{buffered_extractions.join('|')})"
+        end
       end
     end
   end

data/lib/js_regex/converter/type_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex
@@ -6,8 +8,8 @@ class JsRegex
     # Template class implementation.
     #
     class TypeConverter < JsRegex::Converter::Base
-      HEX_EXPANSION    = '[A-Fa-f0-9]'.freeze
-      NONHEX_EXPANSION = '[^A-Fa-f0-9]'.freeze
+      HEX_EXPANSION    = '[A-Fa-f0-9]'
+      NONHEX_EXPANSION = '[^A-Fa-f0-9]'
       private
@@ -15,7 +17,7 @@ class JsRegex
         case subtype
         when :hex then HEX_EXPANSION
         when :nonhex then NONHEX_EXPANSION
-        when :any, :digit, :nondigit, :word, :nonword, :space, :nonspace
+        when :digit, :nondigit, :word, :nonword, :space, :nonspace
           pass_through
         else
           warn_of_unsupported_feature

data/lib/js_regex/converter/unsupported_token_converter.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 require_relative 'base'
 class JsRegex

data/lib/js_regex/property_map.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+# encoding: utf-8
 # frozen_string_literal: true
 #
 # This hash maps named properties that are available in Ruby's ::Regexp to
@@ -11,8 +12,6 @@
 # Note that the names emitted by Scanner are slightly inconsistent at times,
 # e.g. 'grapheme_extend' vs. 'other_grapheme_extended'.
 #
-# Surrogate blocks are left out because Ruby sees them as invalid unicode range.
-#
 # rubocop:disable ClassLength, LineLength
 #
 class JsRegex
@@ -99,6 +98,8 @@ class JsRegex
     block_inhangul_syllables: '[\uAC00-\uD7AF]',
     block_inhanunoo: '[\u1720-\u173F]',
     block_inhebrew: '[\u0590-\u05FF]',
+    block_inhigh_private_use_surrogates: '[\uDB80–\uDBFF]',
+    block_inhigh_surrogates: '[\uD800–\uDBFF]',
     block_inhiragana: '[\u3040-\u309F]',
     block_inideographic_description_characters: '[\u2FF0-\u2FFF]',
     block_inipa_extensions: '[\u0250-\u02AF]',
@@ -116,6 +117,7 @@ class JsRegex
     block_inlatin_extended_b: '[\u0180-\u024F]',
     block_inletterlike_symbols: '[\u2100-\u214F]',
     block_inlimbu: '[\u1900-\u194F]',
+    block_inlow_surrogates: '[\uDC00–\uDFFF]',
     block_inmalayalam: '[\u0D00-\u0D7F]',
     block_inmathematical_operators: '[\u2200-\u22FF]',
     block_inmiscellaneous_mathematical_symbols_a: '[\u27C0-\u27EF]',
@@ -310,6 +312,7 @@ class JsRegex
     separator_space: '[\x20\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]',
     soft_dotted: '[\u0069-\u006A\u012F\u0249\u0268\u029D\u02B2\u03F3\u0456\u0458\u1D62\u1D96\u1DA4\u1DA8\u1E2D\u1ECB\u2071\u2148-\u2149\u2C7C]',
     space: '[\s]',
+    surrogate: '[\uD800-\uDFFF]',
     symbol: '[\x24\x2B\x3C-\x3E\x5E\x60\x7C\x7E\u00A2-\u00A6\u00A8\u00A9\u00AC\u00AE-\u00B1\u00B4\u00B8\u00D7\u00F7\u02C2-\u02C5\u02D2-\u02DF\u02E5-\u02EB\u02ED\u02EF-\u02FF\u0375\u0384\u0385\u03F6\u0482\u058D-\u058F\u0606-\u0608\u060B\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09F2\u09F3\u09FA\u09FB\u0AF1\u0B70\u0BF3-\u0BFA\u0C7F\u0D79\u0E3F\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u17DB\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u1FBD\u1FBF-\u1FC1\u1FCD-\u1FCF\u1FDD-\u1FDF\u1FED-\u1FEF\u1FFD\u1FFE\u2044\u2052\u207A-\u207C\u208A-\u208C\u20A0-\u20BD\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116-\u2118\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u2140-\u2144\u214A-\u214D\u214F\u2190-\u2307\u230C-\u2328\u232B-\u23FA\u2400-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u2767\u2794-\u27C4\u27C7-\u27E5\u27F0-\u2982\u2999-\u29D7\u29DC-\u29FB\u29FE-\u2B73\u2B76-\u2B95\u2B98-\u2BB9\u2BBD-\u2BC8\u2BCA-\u2BD1\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u309B\u309C\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA700-\uA716\uA720\uA721\uA789\uA78A\uA828-\uA82B\uA836-\uA839\uAA77-\uAA79\uAB5B\uFB29\uFBB2-\uFBC1\uFDFC\uFDFD\uFE62\uFE64-\uFE66\uFE69\uFF04\uFF0B\uFF1C-\uFF1E\uFF3E\uFF40\uFF5C\uFF5E\uFFE0-\uFFE6\uFFE8-\uFFEE\uFFFC\uFFFD]',
     symbol_currency: '[\x24\u00A2-\u00A5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6]',
     symbol_math: '[\x2B\x3C-\x3E\x7C\x7E\u00AC\u00B1\u00D7\u00F7\u03F6\u0606-\u0608\u2044\u2052\u207A-\u207C\u208A-\u208C\u2118\u2140-\u2144\u214B\u2190-\u2194\u219A\u219B\u21A0\u21A3\u21A6\u21AE\u21CE\u21CF\u21D2\u21D4\u21F4-\u22FF\u2320\u2321\u237C\u239B-\u23B3\u23DC-\u23E1\u25B7\u25C1\u25F8-\u25FF\u266F\u27C0-\u27C4\u27C7-\u27E5\u27F0-\u27FF\u2900-\u2982\u2999-\u29D7\u29DC-\u29FB\u29FE-\u2AFF\u2B30-\u2B44\u2B47-\u2B4C\uFB29\uFE62\uFE64-\uFE66\uFF0B\uFF1C-\uFF1E\uFF5C\uFF5E\uFFE2\uFFE9-\uFFEC]',

data/lib/js_regex/version.rb ADDED Viewed

@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+class JsRegex
+  VERSION = '1.1.0'
+end

data/lib/js_regex.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+# frozen_string_literal: true
 #
 # JsRegex converts ::Regexp instances to JavaScript.
 #

metadata CHANGED Viewed

@@ -1,59 +1,65 @@
 --- !ruby/object:Gem::Specification
 name: js_regex
 version: !ruby/object:Gem::Version
-  version: 1.0.19
+  version: 1.1.0
 platform: ruby
 authors:
 - Janosch Müller
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-11-15 00:00:00.000000000 Z
+date: 2016-11-28 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: regexp_parser
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - '='
+    - - ">="
       - !ruby/object:Gem::Version
         version: 0.3.6
+    - - "<="
+      - !ruby/object:Gem::Version
+        version: 0.4.1
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - '='
+    - - ">="
       - !ruby/object:Gem::Version
         version: 0.3.6
+    - - "<="
+      - !ruby/object:Gem::Version
+        version: 0.4.1
 - !ruby/object:Gem::Dependency
-  name: mutant-rspec
+  name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.8'
+        version: '11.3'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.8'
+        version: '11.3'
 - !ruby/object:Gem::Dependency
-  name: rake
+  name: rspec-core
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '11.3'
+        version: '3.5'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '11.3'
+        version: '3.5'
 - !ruby/object:Gem::Dependency
-  name: rspec-core
+  name: rspec-expectations
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
@@ -67,7 +73,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '3.5'
 - !ruby/object:Gem::Dependency
-  name: rspec-expectations
+  name: rspec-mocks
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
@@ -108,6 +114,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.6'
+- !ruby/object:Gem::Dependency
+  name: mutant-rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
   care of various incompatibilities and returning warnings for unsolvable differences.
 email:
@@ -136,6 +156,7 @@ files:
 - lib/js_regex/converter/type_converter.rb
 - lib/js_regex/converter/unsupported_token_converter.rb
 - lib/js_regex/property_map.rb
+- lib/js_regex/version.rb
 homepage: https://github.com/janosch-x/js_regex
 licenses:
 - MIT
@@ -148,7 +169,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.9.1
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
@@ -156,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.5.1
+rubygems_version: 2.5.2
 signing_key:
 specification_version: 4
 summary: Converts Ruby regexes to JavaScript regexes.