RubyGems - js_regex - Versions diffs - 1.2.3 → 2.0.0 - Mend

js_regex 1.2.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +4 -4
data/lib/js_regex.rb +2 -1
data/lib/js_regex/conversion.rb +20 -72
data/lib/js_regex/converter.rb +29 -0
data/lib/js_regex/converter/anchor_converter.rb +9 -2
data/lib/js_regex/converter/assertion_converter.rb +2 -3
data/lib/js_regex/converter/backreference_converter.rb +19 -9
data/lib/js_regex/converter/base.rb +45 -19
data/lib/js_regex/converter/conditional_converter.rb +5 -8
data/lib/js_regex/converter/context.rb +54 -77
data/lib/js_regex/converter/escape_converter.rb +34 -1
data/lib/js_regex/converter/freespace_converter.rb +1 -1
data/lib/js_regex/converter/group_converter.rb +27 -41
data/lib/js_regex/converter/meta_converter.rb +8 -4
data/lib/js_regex/converter/nonproperty_converter.rb +1 -6
data/lib/js_regex/converter/root_converter.rb +18 -0
data/lib/js_regex/converter/set_converter.rb +56 -61
data/lib/js_regex/converter/type_converter.rb +4 -2
data/lib/js_regex/property_map.rb +1 -1
data/lib/js_regex/version.rb +1 -1
metadata +35 -6
data/lib/js_regex/converter/quantifier_converter.rb +0 -31

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3d910abcbfe22ab6a470b8784025da0d81a7d823
-  data.tar.gz: 8a30eddfc5efe71a598afe7064f3a0ee944e017e
+  metadata.gz: 2abae347b737c7635396c45a3a1489c72d6957e5
+  data.tar.gz: 5b674eaf6902686ab94648000308090fa154783c
 SHA512:
-  metadata.gz: 031334a1f4d48e5f432cb6bfada63eb2821b61d8d2c8e052548c98db0f5a8fd83f606305c00976c51c6e6f1caaa6f0def2c0f405914a4daa694c2606b92a5c25
-  data.tar.gz: eb7be820e5112f488fa69326e4870a0f9ac8d4538138c41d83e201551a8aa2a2cfe36ca5cba18e944a5a89a0fd25fc8783306062bea7017f1fddbb559d1fb299
+  metadata.gz: 3406867e15cb70cbc0a9f137f2449fce1af94fb448bcdbee3cfa6e439f64c92d4df9a5407fee251ea127a59faf8f6cbf81d55b38279d79e1a6de24e5b1c6673a
+  data.tar.gz: d0e63535e25eb8adb3e8d95859a884ce0eaec81ba25f1da8970213f4572fd558b9cbb7e3b25dc59d261213a9a0d7f9e0e7bb29b4f14cffba5fa9df49b0ec4780

data/lib/js_regex.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
-#
 # JsRegex converts ::Regexp instances to JavaScript.
 #
 # Usage:
@@ -10,6 +10,7 @@
 #
 class JsRegex
   require_relative File.join('js_regex', 'conversion')
+  require_relative File.join('js_regex', 'version')
   require 'json'
   attr_reader :source, :options, :warnings

data/lib/js_regex/conversion.rb CHANGED

@@ -2,88 +2,36 @@
 class JsRegex
   #
-  # This class acts as a facade, creating specific Converters and
-  # passing Regexp::Scanner tokens to them, reusing Converters as needed.
+  # This class acts as a facade, passing a regex to the converters.
   #
   # ::of returns a source String, options String, and warnings Array.
   #
   class Conversion
     require 'regexp_parser'
-    Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
+    require_relative 'converter'
-    attr_reader :ruby_regex, :context, :converters, :source, :options, :warnings
-    def initialize(ruby_regex)
-      self.ruby_regex = ruby_regex
-      self.context    = Converter::Context.new
-      self.converters = {}
-      self.source     = ''.dup
-      self.options    = ''.dup
-      self.warnings   = []
-      convert_source
-      convert_options
-      perform_sanity_check
-    end
-    def self.of(ruby_regex)
-      conversion = new(ruby_regex)
-      [conversion.source, conversion.options, conversion.warnings]
-    end
-    private
-    attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
-    CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
-      anchor:      Converter::AnchorConverter,
-      assertion:   Converter::AssertionConverter,
-      backref:     Converter::BackreferenceConverter,
-      conditional: Converter::ConditionalConverter,
-      escape:      Converter::EscapeConverter,
-      free_space:  Converter::FreespaceConverter,
-      group:       Converter::GroupConverter,
-      literal:     Converter::LiteralConverter,
-      meta:        Converter::MetaConverter,
-      nonproperty: Converter::NonpropertyConverter,
-      property:    Converter::PropertyConverter,
-      quantifier:  Converter::QuantifierConverter,
-      set:         Converter::SetConverter,
-      subset:      Converter::SetConverter,
-      type:        Converter::TypeConverter
-    ).freeze
-    def convert_source
-      Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
-        # There might be a lot of tokens, so don't wrap their data in objects.
-        # Even just wrapping them in simple structs or attr_reader objects
-        # can lead to 60%+ longer processing times for large regexes.
-        converter_for_token_class(token_class)
-          .convert(token_class, subtype, data, s, e)
+    class << self
+      def of(ruby_regex)
+        source, warnings = convert_source(ruby_regex)
+        options          = convert_options(ruby_regex)
+        [source, options, warnings]
       end
-    end
-    def converter_for_token_class(token_class)
-      converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
-    end
-    def convert_options
-      options << 'g' # all Ruby regexes are what is called "global" in JS
-      options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
-    end
+      private
-    SURROGATE_CODEPOINT_PATTERN = /\\uD[89A-F]\h\h/i
+      def convert_source(ruby_regex)
+        context         = Converter::Context.new(ruby_regex)
+        expression_tree = Regexp::Parser.parse(ruby_regex)
+        [
+          Converter::RootConverter.new.convert(expression_tree, context),
+          context.warnings
+        ]
+      end
-    def perform_sanity_check
-      # Ruby regex capabilities are a superset of JS regex capabilities in
-      # the source part. So if this raises an Error, a Converter messed up.
-      # Ignore that Ruby won't accept surrogate pairs, though.
-      Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
-    rescue ArgumentError, RegexpError, SyntaxError => e
-      self.source = ''
-      warnings << e.message
+      def convert_options(ruby_regex)
+        ignore_case = (ruby_regex.options & Regexp::IGNORECASE).nonzero?
+        ignore_case ? 'gi' : 'g'
+      end
     end
   end
 end

data/lib/js_regex/converter.rb ADDED

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+class JsRegex
+  module Converter
+    Dir[File.join(File.dirname(__FILE__), 'converter', '*.rb')].each do |file|
+      require file
+    end
+    MAP = Hash.new(UnsupportedTokenConverter).merge(
+      anchor:      AnchorConverter,
+      assertion:   AssertionConverter,
+      backref:     BackreferenceConverter,
+      conditional: ConditionalConverter,
+      escape:      EscapeConverter,
+      free_space:  FreespaceConverter,
+      group:       GroupConverter,
+      literal:     LiteralConverter,
+      meta:        MetaConverter,
+      nonproperty: NonpropertyConverter,
+      property:    PropertyConverter,
+      set:         SetConverter,
+      type:        TypeConverter
+    ).freeze
+    def self.for(expression)
+      MAP[expression.type].new
+    end
+  end
+end

data/lib/js_regex/converter/anchor_converter.rb CHANGED

@@ -15,12 +15,19 @@ class JsRegex
         when :bol, :bos then '^'
         when :eol, :eos then '$'
         when :eos_ob_eol then '(?=\n?$)'
-        when :word_boundary then '\b'
-        when :nonword_boundary then '\B'
+        when :word_boundary then pass_boundary_with_warning('\b')
+        when :nonword_boundary then pass_boundary_with_warning('\B')
         else
           warn_of_unsupported_feature
         end
       end
+      def pass_boundary_with_warning(boundary)
+        warn("The boundary '#{boundary}' at index #{expression.ts} "\
+             'is not unicode-aware in JavaScript, '\
+             'so it might act differently than in Ruby.')
+        boundary
+      end
     end
   end
 end

data/lib/js_regex/converter/assertion_converter.rb CHANGED

@@ -16,12 +16,11 @@ class JsRegex
       def convert_data
         case subtype
         when :lookahead, :nlookahead
-          open_group(capturing: false)
+          build_group(capturing: false)
         when :nlookbehind
-          context.start_negative_lookbehind
           warn_of_unsupported_feature('negative lookbehind assertion')
         else # :lookbehind, ...
-          open_unsupported_group
+          build_unsupported_group
         end
       end
     end

data/lib/js_regex/converter/backreference_converter.rb CHANGED

@@ -12,20 +12,30 @@ class JsRegex
       def convert_data
         case subtype
-        when :number
-          convert_number_backref
+        when :number, :number_ref
+          convert_number_ref
+        when :number_rel_ref
+          convert_number_rel_ref
+        when :name_ref
+          convert_name_ref
         else
           warn_of_unsupported_feature
         end
       end
-      def convert_number_backref
-        if context.group_count_changed
-          warn_of_unsupported_feature('number backreference following a '\
-            'feature that changes the group count (such as an atomic group)')
-        else
-          pass_through
-        end
+      def convert_number_ref
+        # after regexp_parser update, replace data[/\d+/] with expression.number
+        "\\#{context.new_capturing_group_position(Integer(data[/\d+/]))}"
+      end
+      def convert_number_rel_ref
+        absolute_position = Integer(expression.number) +
+                            context.original_capturing_group_count + 1
+        "\\#{context.new_capturing_group_position(absolute_position)}"
+      end
+      def convert_name_ref
+        "\\#{context.named_group_positions.fetch(expression.name)}"
       end
     end
   end

data/lib/js_regex/converter/base.rb CHANGED

@@ -6,35 +6,61 @@ class JsRegex
     # Template class. Implement #convert_data in subclasses.
     #
     class Base
-      attr_reader :target, :context
+      def convert(expression, context)
+        self.context    = context
+        self.expression = expression
-      def initialize(target, context)
-        @target = target
-        @context = context
+        source = convert_data
+        apply_quantifier(source)
       end
-      def convert(token_class, subtype, data, start_index, end_index)
-        self.token_class = token_class
-        self.subtype = subtype
-        self.data = data
-        self.start_index = start_index
-        self.end_index = end_index
+      private
+      attr_accessor :context, :expression
-        result = convert_data
-        target.source << (context.valid? ? result : '')
+      def subtype
+        expression.token
       end
-      private
+      def data
+        expression.text
+      end
+      alias pass_through data
-      attr_accessor :token_class, :subtype, :data, :start_index, :end_index
+      def apply_quantifier(source)
+        return source if source.empty? || !(quantifier = expression.quantifier)
-      alias pass_through data
+        if quantifier.mode.equal?(:possessive)
+          context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
+        else
+          source + quantifier
+        end
+      end
+      def convert_subexpressions
+        convert_expressions(subexpressions)
+      end
+      def convert_expressions(expressions)
+        expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
+      end
+      def subexpressions
+        expression.expressions
+      end
       def warn_of_unsupported_feature(description = nil)
-        description ||= "#{subtype} #{token_class}".tr('_', ' ')
-        full_description = "#{description} '#{data}'"
-        target.warnings << "Dropped unsupported #{full_description} "\
-                           "at index #{start_index}...#{end_index}"
+        description ||= "#{subtype} #{expression.type}".tr('_', ' ')
+        full_desc = "#{description} '#{expression}'"
+        warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
+        ''
+      end
+      def warn(text)
+        context.warnings << text
+      end
+      def drop_without_warning
         ''
       end
     end

data/lib/js_regex/converter/conditional_converter.rb CHANGED

@@ -11,15 +11,12 @@ class JsRegex
       private
       def convert_data
-        case subtype
-        when :open
-          warn_of_unsupported_feature('conditional')
-          '('
-        when :separator, :close
-          pass_through
-        else
-          '' # one warning is enough, don't warn about other parts
+        warn_of_unsupported_feature('conditional')
+        branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
+          converted_branch = convert_expressions(branch)
+          arr << converted_branch unless converted_branch.eql?('')
         end
+        "(?:#{branches.join('|')})"
       end
     end
   end

data/lib/js_regex/converter/context.rb CHANGED

@@ -10,120 +10,97 @@ class JsRegex
     class Context
       attr_reader :buffered_set_extractions,
                   :buffered_set_members,
-                  :captured_group_count,
-                  :group_count_changed,
-                  :group_level_for_backreference,
-                  :negative_lookbehind
+                  :in_atomic_group,
+                  :named_group_positions,
+                  :negative_base_set,
+                  :root_options,
+                  :warnings
-      def initialize
-        self.buffered_set_members = []
-        self.buffered_set_extractions = []
-        self.captured_group_count = 0
-        self.group_level = 0
-        self.negative_set_levels = []
-        self.set_level = 0
-      end
-      def valid?
-        !negative_lookbehind
-      end
-      def stacked_quantifier?(quantifier_start_index, quantifier_end_index)
-        is_stacked = last_quantifier_end_index.equal?(quantifier_start_index)
-        self.last_quantifier_end_index = quantifier_end_index
-        is_stacked
-      end
-      # set context
-      def open_set
-        self.set_level = set_level + 1
-        if set_level == 1
-          buffered_set_members.clear
-          buffered_set_extractions.clear
-        end
-        negative_set_levels.delete(set_level)
-      end
+      def initialize(ruby_regex)
+        self.added_capturing_groups_after_group = Hash.new(0)
+        self.capturing_group_count = 0
+        self.named_group_positions = {}
+        self.warnings = []
-      def negate_set
-        self.negative_set_levels = negative_set_levels | [set_level]
+        self.root_options = {}
+        root_options[:m] = !(ruby_regex.options & Regexp::MULTILINE).equal?(0)
       end
-      def close_set
-        self.set_level = set_level - 1
-      end
+      # option context
-      def set?
-        set_level > 0
+      def multiline?
+        root_options.fetch(:m)
       end
-      def negative_set?(level = set_level)
-        negative_set_levels.include?(level)
-      end
+      # set context
-      def nested_negation?
-        nested_set? && negative_set?
+      def negate_base_set
+        self.negative_base_set = true
       end
-      def nested_set?
-        set_level > 1
+      def reset_set_context
+        self.buffered_set_extractions = []
+        self.buffered_set_members = []
+        self.negative_base_set = false
       end
       # group context
-      def open_group
-        self.group_level = group_level + 1
-      end
       def capture_group
-        self.captured_group_count = captured_group_count + 1
+        self.capturing_group_count = capturing_group_count + 1
       end
       def start_atomic_group
-        self.group_level_for_backreference = group_level
+        self.in_atomic_group = true
       end
-      def start_negative_lookbehind
-        self.negative_lookbehind = true
+      def end_atomic_group
+        self.in_atomic_group = false
       end
-      def close_group
-        self.group_level = group_level - 1
+      def wrap_in_backrefed_lookahead(content)
+        new_backref_num = capturing_group_count + 1
+        # an empty passive group (?:) is appended as literal digits may follow
+        result = "(?=(#{content}))\\#{new_backref_num}(?:)"
+        added_capturing_groups_after_group[original_capturing_group_count] += 1
+        capture_group
+        result
       end
-      def close_atomic_group
-        close_group
-        self.group_level_for_backreference = nil
-        self.group_count_changed = true
+      # takes and returns 1-indexed group positions.
+      # new is different from old if capturing groups were added in between.
+      def new_capturing_group_position(old_position)
+        increment = 0
+        added_capturing_groups_after_group.each do |after_n_groups, count|
+          increment += count if after_n_groups < old_position
+        end
+        old_position + increment
       end
-      def close_negative_lookbehind
-        close_group
-        self.negative_lookbehind = false
+      def original_capturing_group_count
+        capturing_group_count - total_added_capturing_groups
       end
-      def atomic_group?
-        group_level_for_backreference
+      def total_added_capturing_groups
+        added_capturing_groups_after_group.values.inject(0, &:+)
       end
-      def base_level_of_atomic_group?
-        group_level_for_backreference &&
-          group_level.equal?(group_level_for_backreference + 1)
+      def store_named_group_position(name)
+        named_group_positions[name] = capturing_group_count + 1
       end
       private
-      attr_accessor :group_level,
-                    :last_quantifier_end_index,
-                    :negative_set_levels,
-                    :set_level
+      attr_accessor :added_capturing_groups_after_group,
+                    :capturing_group_count
       attr_writer :buffered_set_extractions,
                   :buffered_set_members,
-                  :captured_group_count,
-                  :group_count_changed,
-                  :group_level_for_backreference,
-                  :negative_lookbehind
+                  :in_atomic_group,
+                  :named_group_positions,
+                  :negative_base_set,
+                  :root_options,
+                  :warnings
     end
   end
 end

data/lib/js_regex/converter/escape_converter.rb CHANGED

@@ -39,12 +39,16 @@ class JsRegex
         case subtype
         when :codepoint_list
           convert_codepoint_list
+        when :control
+          convert_control_sequence
         when :literal
           LiteralConverter.convert_data(data)
+        when :meta_sequence
+          convert_meta_sequence
         when *ESCAPES_SHARED_BY_RUBY_AND_JS
           pass_through
         else
-          # Bell, Escape, HexWide, Control, Meta, MetaControl, ...
+          # Bell, Escape, HexWide, ...
           warn_of_unsupported_feature
         end
       end
@@ -56,6 +60,35 @@ class JsRegex
         end
         elements.join
       end
+      def convert_control_sequence
+        convert_meta_control_sequence ||
+          unicode_escape_for(control_sequence_to_s(data))
+      end
+      def convert_meta_sequence
+        convert_meta_control_sequence ||
+          unicode_escape_for(meta_char_to_char_code(data[-1]))
+      end
+      def convert_meta_control_sequence
+        return unless expression.class.to_s.include?('MetaControl')
+        unicode_escape_for(meta_char_to_char_code(control_sequence_to_s(data)))
+      end
+      def unicode_escape_for(char)
+        "\\u#{char.ord.to_s(16).upcase.rjust(4, '0')}"
+      end
+      def control_sequence_to_s(control_sequence)
+        five_lsb = control_sequence.unpack('B*').first[-5..-1]
+        ["000#{five_lsb}"].pack('B*')
+      end
+      def meta_char_to_char_code(meta_char)
+        byte_value = meta_char.ord
+        byte_value < 128 ? byte_value + 128 : byte_value
+      end
     end
   end
 end

data/lib/js_regex/converter/freespace_converter.rb CHANGED

@@ -11,7 +11,7 @@ class JsRegex
       private
       def convert_data
-        '' # drop data without warning
+        drop_without_warning
       end
     end
   end

data/lib/js_regex/converter/group_converter.rb CHANGED

@@ -12,66 +12,52 @@ class JsRegex
       def convert_data
         case subtype
-        when :atomic then open_atomic_group
-        when :capture then open_group
-        when :close then close_group
-        when :comment then '' # drop whole group without warning
-        when :named_ab, :named_sq then open_named_group
-        when :options then open_options_group
-        when :passive then open_passive_group
-        else open_unsupported_group
+        when :atomic then emulate_atomic_group
+        when :capture then build_group
+        when :comment then drop_without_warning
+        when :named then build_named_group
+        when :options then build_options_group
+        when :passive then build_passive_group
+        when :absence then warn_of_unsupported_feature
+        else build_unsupported_group
         end
       end
-      def open_atomic_group
-        # Atomicity is emulated using backreferenced lookahead groups:
-        # http://instanceof.me/post/52245507631
-        # regex-emulate-atomic-grouping-with-lookahead
-        if context.atomic_group?
-          open_unsupported_group('nested atomic group')
+      def emulate_atomic_group
+        if context.in_atomic_group
+          build_unsupported_group('nested atomic group')
         else
           context.start_atomic_group
-          open_group(head: '(?=(')
+          result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
+          context.end_atomic_group
+          result
         end
       end
-      def open_named_group
-        # drop name without warning
-        open_group(head: '(')
+      def build_named_group
+        # remember position, then drop name part without warning
+        context.store_named_group_position(expression.name)
+        build_group(head: '(')
       end
-      def open_options_group
+      def build_options_group
         warn_of_unsupported_feature('group-specific options')
-        open_group(head: '(')
+        build_group(head: '(')
       end
-      def open_passive_group
-        open_group(head: '(?:', capturing: false)
+      def build_passive_group
+        build_group(head: '(?:', capturing: false)
       end
-      def open_unsupported_group(description = nil)
+      def build_unsupported_group(description = nil)
         warn_of_unsupported_feature(description)
-        open_passive_group
+        build_passive_group
       end
-      def open_group(opts = {})
-        context.open_group
+      def build_group(opts = {})
         context.capture_group unless opts[:capturing].equal?(false)
-        opts[:head] || pass_through
-      end
-      def close_group
-        if context.negative_lookbehind
-          context.close_negative_lookbehind
-          ''
-        elsif context.base_level_of_atomic_group?
-          context.close_atomic_group
-          # an empty passive group (?:) is appended as literal digits may follow
-          "))\\#{context.captured_group_count}(?:)"
-        else
-          context.close_group
-          ')'
-        end
+        head = opts[:head] || pass_through
+        "#{head}#{convert_subexpressions})"
       end
     end
   end

data/lib/js_regex/converter/meta_converter.rb CHANGED

@@ -13,16 +13,20 @@ class JsRegex
       def convert_data
         case subtype
         when :alternation
-          pass_through
+          convert_alternation
         when :dot
-          ruby_multiline_mode? ? '(?:.|\n)' : '.'
+          context.multiline? ? '(?:.|\n)' : '.'
         else
           warn_of_unsupported_feature
         end
       end
-      def ruby_multiline_mode?
-        (target.ruby_regex.options & Regexp::MULTILINE).nonzero?
+      def convert_alternation
+        branches = subexpressions.each_with_object([]) do |branch, arr|
+          converted_branch = convert_expressions(branch.expressions)
+          arr << converted_branch unless converted_branch.eql?('')
+        end
+        branches.join('|')
       end
     end
   end

data/lib/js_regex/converter/nonproperty_converter.rb CHANGED

@@ -14,12 +14,7 @@ class JsRegex
       private
       def convert_data
-        if context.set?
-          context.buffered_set_extractions << convert_property(true)
-          ''
-        else
-          convert_property(true)
-        end
+        convert_property(true)
       end
     end
   end

data/lib/js_regex/converter/root_converter.rb ADDED

@@ -0,0 +1,18 @@
+# frozen_string_literal: true
+require_relative 'base'
+class JsRegex
+  module Converter
+    #
+    # Template class implementation.
+    #
+    class RootConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        convert_subexpressions
+      end
+    end
+  end
+end

data/lib/js_regex/converter/set_converter.rb CHANGED

@@ -3,107 +3,102 @@
 require_relative 'base'
 require_relative 'literal_converter'
 require_relative 'property_converter'
-require_relative 'type_converter'
 class JsRegex
   module Converter
     #
     # Template class implementation.
     #
-    # This converter works a little differently from the others.
-    #
-    # It buffers anything that it finds within a set in the Context's
-    # #buffered_set_members and #buffered_set_extractions Arrays,
-    # returning an empty String for all passed tokens, and only when
-    # the set is closed does it compile and return the final String.
-    #
     class SetConverter < JsRegex::Converter::Base
       private
       def convert_data
-        case subtype
-        when :open then convert_open_subtype
-        when :negate then convert_negate_subtype
-        when :close then convert_close_subtype
-        when :member, :member_hex, :range, :range_hex, :escape
-          convert_member_subtype
-        when /\Aclass_/ then convert_class_subtype
-        when /\Atype_/ then convert_type_subtype
-        when :backspace then convert_backspace_subtype
-        when :intersection then warn_of_unsupported_feature('set intersection')
-        else try_replacing_potential_property_subtype
+        if expression.set_level.equal?(0) # reached end of set expression
+          context.reset_set_context
+          context.negate_base_set if negative_set?
+          process_members
+          finalize_set
+        elsif negative_set?
+          warn_of_unsupported_feature('nested negative set data')
+        else # positive subset
+          process_members
         end
       end
-      def convert_open_subtype
-        context.open_set
-        ''
+      def negative_set?
+        expression.negative?
       end
-      def convert_negate_subtype
-        if context.nested_set?
-          warn_of_unsupported_feature('nested negative set data')
-        end
-        context.negate_set
-        ''
+      def process_members
+        expression.each { |member| process_member(member) }
       end
-      def convert_close_subtype
-        context.close_set
-        context.set? ? '' : finalize_set
-      end
+      ASTRAL_PLANE_PATTERN = /[\u{10000}-\u{FFFFF}]/
+      PROPERTY_PATTERN     = /\A(?:\[:|\\([pP])\{)(\^?)([^:\}]+)/
+      def process_member(member)
+        return convert_subset(member) unless member.instance_of?(String)
-      def convert_member_subtype
-        utf8_data = data.force_encoding('UTF-8')
-        if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
+        utf8_data = member.dup.force_encoding('UTF-8')
+        case utf8_data
+        when ASTRAL_PLANE_PATTERN
           warn_of_unsupported_feature('astral plane set member')
+        when '\\h'
+          handle_hex_type
+        when '\\H'
+          handle_nonhex_type
+        when '&&'
+          warn_of_unsupported_feature('set intersection')
+        when PROPERTY_PATTERN
+          handle_property($1, $2, $3)
         else
           literal_conversion = LiteralConverter.convert_data(utf8_data)
           buffer_set_member(literal_conversion)
         end
       end
-      def convert_class_subtype
-        negated = subtype.to_s.start_with?('class_non')
-        name = subtype[(negated ? 9 : 6)..-1]
-        try_replacing_property(name, negated)
+      HEX_RANGES = 'A-Fa-f0-9'
+      NONHEX_SET = '[^A-Fa-f0-9]'
+      def handle_hex_type
+        buffer_set_member(HEX_RANGES)
       end
-      def try_replacing_potential_property_subtype
-        negated = data.start_with?('\\P')
-        try_replacing_property(subtype, negated)
+      def handle_nonhex_type
+        if context.negative_base_set
+          warn_of_unsupported_feature('nonhex type in negative set')
+        else
+          buffer_set_extraction(NONHEX_SET)
+        end
       end
-      def try_replacing_property(name, negated)
-        if (replacement = PropertyConverter.property_replacement(name, negated))
+      def handle_property(sign, caret, name)
+        if context.negative_base_set
+          return warn_of_unsupported_feature('property in negative set')
+        end
+        std = standardize_property_name(name)
+        negated = sign.eql?('P') ^ caret.eql?('^')
+        if (replacement = PropertyConverter.property_replacement(std, negated))
           buffer_set_extraction(replacement)
         else
           warn_of_unsupported_feature('property')
         end
       end
-      def convert_type_subtype
-        if subtype.equal?(:type_hex)
-          buffer_set_extraction(TypeConverter::HEX_EXPANSION)
-        elsif subtype.equal?(:type_nonhex)
-          buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
-        else
-          buffer_set_member(data)
-        end
+      def standardize_property_name(name)
+        Regexp::Parser.parse("\\p{#{name}}").expressions.first.token
       end
-      def convert_backspace_subtype
-        buffer_set_extraction('[\b]')
+      def buffer_set_member(data)
+        context.buffered_set_members << data
       end
-      def buffer_set_member(m)
-        context.buffered_set_members << m unless context.nested_negation?
-        ''
+      def buffer_set_extraction(data)
+        context.buffered_set_extractions << data
       end
-      def buffer_set_extraction(e)
-        context.buffered_set_extractions << e unless context.nested_negation?
-        ''
+      def convert_subset(subset)
+        SetConverter.new.convert(subset, context)
       end
       def finalize_set
@@ -125,7 +120,7 @@ class JsRegex
       end
       def finalize_nondepleted_set(buffered_members, buffered_extractions)
-        set = "[#{'^' if context.negative_set?(1)}#{buffered_members.join}]"
+        set = "[#{'^' if negative_set?}#{buffered_members.join}]"
         if buffered_extractions.empty?
           set
         else

data/lib/js_regex/converter/type_converter.rb CHANGED

@@ -8,8 +8,9 @@ class JsRegex
     # Template class implementation.
     #
     class TypeConverter < JsRegex::Converter::Base
-      HEX_EXPANSION    = '[A-Fa-f0-9]'
-      NONHEX_EXPANSION = '[^A-Fa-f0-9]'
+      HEX_EXPANSION       = '[A-Fa-f0-9]'
+      NONHEX_EXPANSION    = '[^A-Fa-f0-9]'
+      LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
       private
@@ -17,6 +18,7 @@ class JsRegex
         case subtype
         when :hex then HEX_EXPANSION
         when :nonhex then NONHEX_EXPANSION
+        when :linebreak then LINEBREAK_EXPANSION
         when :digit, :nondigit, :word, :nonword, :space, :nonspace
           pass_through
         else

data/lib/js_regex/property_map.rb CHANGED

@@ -1,6 +1,6 @@
 # encoding: utf-8
 # frozen_string_literal: true
-#
 # This hash maps named properties that are available in Ruby's ::Regexp to
 # standard sets that can be handled by JavaScript.
 #

data/lib/js_regex/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 class JsRegex
-  VERSION = '1.2.3'
+  VERSION = '2.0.0'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: js_regex
 version: !ruby/object:Gem::Version
-  version: 1.2.3
+  version: 2.0.0
 platform: ruby
 authors:
 - Janosch Müller
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-04-12 00:00:00.000000000 Z
+date: 2017-09-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: regexp_parser
@@ -16,7 +16,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.3.6
+        version: 0.4.6
     - - "<="
       - !ruby/object:Gem::Version
         version: 0.5.0
@@ -26,7 +26,7 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.3.6
+        version: 0.4.6
     - - "<="
       - !ruby/object:Gem::Version
         version: 0.5.0
@@ -100,6 +100,34 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '0.12'
+- !ruby/object:Gem::Dependency
+  name: codeclimate-test-reporter
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: mutant-rspec
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.8'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '0.8'
 description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
   care of various incompatibilities and returning warnings for unsolvable differences.
 email:
@@ -110,6 +138,7 @@ extra_rdoc_files: []
 files:
 - lib/js_regex.rb
 - lib/js_regex/conversion.rb
+- lib/js_regex/converter.rb
 - lib/js_regex/converter/anchor_converter.rb
 - lib/js_regex/converter/assertion_converter.rb
 - lib/js_regex/converter/backreference_converter.rb
@@ -123,7 +152,7 @@ files:
 - lib/js_regex/converter/meta_converter.rb
 - lib/js_regex/converter/nonproperty_converter.rb
 - lib/js_regex/converter/property_converter.rb
-- lib/js_regex/converter/quantifier_converter.rb
+- lib/js_regex/converter/root_converter.rb
 - lib/js_regex/converter/set_converter.rb
 - lib/js_regex/converter/type_converter.rb
 - lib/js_regex/converter/unsupported_token_converter.rb
@@ -149,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.6.11
+rubygems_version: 2.6.13
 signing_key:
 specification_version: 4
 summary: Converts Ruby regexes to JavaScript regexes.

data/lib/js_regex/converter/quantifier_converter.rb DELETED

@@ -1,31 +0,0 @@
-# frozen_string_literal: true
-require_relative 'base'
-class JsRegex
-  module Converter
-    #
-    # Template class implementation.
-    #
-    class QuantifierConverter < JsRegex::Converter::Base
-      private
-      def convert_data
-        if context.stacked_quantifier?(start_index, end_index)
-          warn_of_unsupported_feature('adjacent quantifiers')
-        else
-          convert_quantifier
-        end
-      end
-      def convert_quantifier
-        if data.length > 1 && data.end_with?('+')
-          warn_of_unsupported_feature('declaration of quantifier as possessive')
-          data[0..-2]
-        else
-          pass_through
-        end
-      end
-    end
-  end
-end