RubyGems - js_regex - Versions diffs - 1.0.6 - Mend

js_regex 1.0.6

Files changed (22) hide show

checksums.yaml +7 -0
data/lib/js_regex/conversion.rb +82 -0
data/lib/js_regex/converter/anchor_converter.rb +24 -0
data/lib/js_regex/converter/assertion_converter.rb +27 -0
data/lib/js_regex/converter/base.rb +44 -0
data/lib/js_regex/converter/conditional_converter.rb +24 -0
data/lib/js_regex/converter/context.rb +63 -0
data/lib/js_regex/converter/escape_converter.rb +27 -0
data/lib/js_regex/converter/freespace_converter.rb +16 -0
data/lib/js_regex/converter/group_converter.rb +81 -0
data/lib/js_regex/converter/literal_converter.rb +29 -0
data/lib/js_regex/converter/meta_converter.rb +28 -0
data/lib/js_regex/converter/nonproperty_converter.rb +18 -0
data/lib/js_regex/converter/property_converter.rb +40 -0
data/lib/js_regex/converter/quantifier_converter.rb +37 -0
data/lib/js_regex/converter/set_converter.rb +137 -0
data/lib/js_regex/converter/subset_converter.rb +10 -0
data/lib/js_regex/converter/type_converter.rb +26 -0
data/lib/js_regex/converter/unsupported_token_converter.rb +16 -0
data/lib/js_regex/property_map.rb +330 -0
data/lib/js_regex.rb +26 -0
metadata +107 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: d7423c167f82ba8e240c7087149430e95f08c98c
+  data.tar.gz: ebbf085f8aede4f731ba598c04e6d2d87f1953fb
+SHA512:
+  metadata.gz: 651ccc96ac12d997fe49361270db2beb076954d8fe43299433370327175ff3a979e4378d2ec96b12401ec14438cc00a83ca47e607b6faac5fd43cbaea2f8d882
+  data.tar.gz: 1f57cab92495b0d6fb23017acde909daf8ef2a682dab21558246a3eb440859aa91d377c5be71699f08c1da50c5cea57e7acfba036f7049bcdd4e0f8e0deb2550

data/lib/js_regex/conversion.rb ADDED Viewed

@@ -0,0 +1,82 @@
+class JsRegex
+  #
+  # This class acts as a facade, creating specific Converters and
+  # passing Regexp::Scanner tokens to them, reusing Converters as needed.
+  #
+  # ::of returns a source String, options String, and warnings Array.
+  #
+  class Conversion
+    require 'regexp_parser'
+    Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
+    attr_reader :ruby_regex, :source, :options, :warnings
+    def initialize(ruby_regex)
+      @ruby_regex = ruby_regex
+      @source = ''
+      @options = ''
+      @warnings = []
+      convert_source(ruby_regex)
+      convert_options(ruby_regex)
+      perform_sanity_check
+    end
+    def self.of(ruby_regex)
+      conversion = new(ruby_regex)
+      [conversion.source, conversion.options, conversion.warnings]
+    end
+    private
+    def convert_source(ruby_regex)
+      Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
+        # There might be a lot of tokens, so don't wrap their data in objects.
+        # Even just wrapping them in simple structs or attr_reader objects
+        # can lead to 60%+ longer processing times for large regexes.
+        convert_token(token_class, subtype, data, s, e)
+      end
+      converters.clear
+    end
+    def convert_token(token_class, subtype, data, s, e)
+      converter = converter_for_token_class(token_class)
+      converter.convert(token_class, subtype, data, s, e)
+    end
+    def converter_for_token_class(token_class)
+      converters[token_class] ||= begin
+        converter_name = converter_name_for_token_class(token_class)
+        converter_class = JsRegex::Converter.const_get(converter_name)
+        converter_class.new(self, context)
+      end
+    end
+    def converter_name_for_token_class(token_class)
+      name = "#{token_class.to_s.delete('_').capitalize}Converter"
+      Converter.const_defined?(name) ? name : 'UnsupportedTokenConverter'
+    end
+    def converters
+      @converters ||= {}
+    end
+    def context
+      @context ||= JsRegex::Converter::Context.new
+    end
+    def convert_options(ruby_regex)
+      @options = 'g' # all Ruby regexes are what is called "global" in JS
+      @options << 'i' if ruby_regex.options & Regexp::IGNORECASE > 0
+    end
+    def perform_sanity_check
+      # Ruby regex capabilities are a superset of JS regex capabilities in
+      # the source part. So if this raises an Error, a Converter messed up:
+      Regexp.new(source, options)
+    rescue ArgumentError, RegexpError, SyntaxError => e
+      @source = ''
+      warnings << e.message
+    end
+  end
+end

data/lib/js_regex/converter/anchor_converter.rb ADDED Viewed

@@ -0,0 +1,24 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class AnchorConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        case subtype
+        when :bol, :bos then '^'
+        when :eol, :eos then '$'
+        when :eos_ob_eol then '(?=\n?$)'
+        when :word_boundary then '\b'
+        when :nonword_boundary then '\B'
+        else
+          warn_of_unsupported_feature
+        end
+      end
+    end
+  end
+end

data/lib/js_regex/converter/assertion_converter.rb ADDED Viewed

@@ -0,0 +1,27 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'group_converter'
+    #
+    # Template class implementation.
+    #
+    # Note the inheritance from GroupConverter.
+    #
+    class AssertionConverter < JsRegex::Converter::GroupConverter
+      private
+      def convert_data
+        case subtype
+        when :lookahead, :nlookahead
+          open_assertion
+        when :nlookbehind
+          context.negative_lookbehind = true
+          warn_of_unsupported_feature('negative lookbehind assertion')
+        else # :lookbehind, ...
+          warn_of_unsupported_feature
+          open_group('(?:')
+        end
+      end
+    end
+  end
+end

data/lib/js_regex/converter/base.rb ADDED Viewed

@@ -0,0 +1,44 @@
+class JsRegex
+  module Converter
+    #
+    # Template class. Implement #convert_data in subclasses.
+    #
+    class Base
+      attr_reader :target, :context
+      def initialize(target, context)
+        @target = target
+        @context = context
+      end
+      def convert(token_class, subtype, data, start_index, end_index)
+        self.token_class = token_class
+        self.subtype = subtype
+        self.data = data
+        self.start_index = start_index
+        self.end_index = end_index
+        target.source << (context.valid? ? convert_data : '')
+      end
+      private
+      attr_accessor :token_class, :subtype, :data, :start_index, :end_index
+      def convert_data
+        fail NotImplementedError
+      end
+      def pass_through
+        data
+      end
+      def warn_of_unsupported_feature(description = nil)
+        description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
+        target.warnings << "Dropped unsupported #{description} "\
+                           "at index #{start_index}..#{end_index}"
+        ''
+      end
+    end
+  end
+end

data/lib/js_regex/converter/conditional_converter.rb ADDED Viewed

@@ -0,0 +1,24 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class ConditionalConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        case subtype
+        when :open
+          warn_of_unsupported_feature("conditional '(?'")
+          '('
+        when :separator, :close
+          pass_through
+        else
+          '' # one warning is enough, don't warn about other parts
+        end
+      end
+    end
+  end
+end

data/lib/js_regex/converter/context.rb ADDED Viewed

@@ -0,0 +1,63 @@
+class JsRegex
+  module Converter
+    #
+    # Passed among Converters to globalize basic status data.
+    #
+    # The Converters themselves are stateless.
+    #
+    class Context
+      attr_accessor :buffered_set_members,
+                    :buffered_set_extractions,
+                    :group_level,
+                    :group_level_for_backreference,
+                    :group_number_for_backreference,
+                    :negative_lookbehind,
+                    :negative_set_levels,
+                    :opened_groups,
+                    :previous_quantifier_subtype,
+                    :previous_quantifier_end,
+                    :set_level
+      def initialize
+        self.buffered_set_members = []
+        self.buffered_set_extractions = []
+        self.group_level = 0
+        self.negative_lookbehind = false
+        self.negative_set_levels = []
+        self.opened_groups = 0
+        self.set_level = 0
+      end
+      def valid?
+        !negative_lookbehind
+      end
+      # set context
+      def open_set
+        self.set_level += 1
+        if set_level == 1
+          buffered_set_members.clear
+          buffered_set_extractions.clear
+        end
+        self.negative_set_levels -= [set_level]
+      end
+      def negate_set
+        self.negative_set_levels |= [set_level]
+      end
+      def negative_set?(level = set_level)
+        negative_set_levels.include?(level)
+      end
+      def nested_negation?
+        set_level > 1 && negative_set?
+      end
+      def close_set
+        self.set_level -= 1
+      end
+    end
+  end
+end

data/lib/js_regex/converter/escape_converter.rb ADDED Viewed

@@ -0,0 +1,27 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    require_relative 'literal_converter'
+    #
+    # Template class implementation.
+    #
+    class EscapeConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        case subtype
+        when :backslash, :dot, :form_feed, :hex, :interval_close,
+             :interval_open, :newline, :one_or_more, :octal, :return,
+             :space, :tab, :vertical_tab, :zero_or_more, :zero_or_one
+          pass_through
+        when :literal
+          LiteralConverter.convert(data, self)
+        else
+          # Backspace, Bell, HexWide, Control, Meta, MetaControl, ...
+          warn_of_unsupported_feature
+        end
+      end
+    end
+  end
+end

data/lib/js_regex/converter/freespace_converter.rb ADDED Viewed

@@ -0,0 +1,16 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class FreespaceConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        '' # drop data without warning
+      end
+    end
+  end
+end

data/lib/js_regex/converter/group_converter.rb ADDED Viewed

@@ -0,0 +1,81 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class GroupConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        case subtype
+        when :atomic then open_atomic_group
+        when :capture, :passive then open_group
+        when :close then close_group
+        when :comment then '' # drop whole group w/o warning
+        when :named_ab, :named_sq then open_group('(') # drop name w/o warning
+        when :options then open_options_group
+        else
+          warn_of_unsupported_feature
+          open_group('(')
+        end
+      end
+      def open_atomic_group
+        # Atomicity is achieved with backreferenced lookahead groups:
+        # http://instanceof.me/post/52245507631
+        # regex-emulate-atomic-grouping-with-lookahead
+        context.group_level_for_backreference = context.group_level
+        context.group_number_for_backreference = context.opened_groups + 1
+        open_assertion('(?=(')
+      end
+      def open_options_group
+        warn_of_unsupported_feature('group-specific options')
+        open_group('(')
+      end
+      def open_group(group_head = pass_through)
+        context.group_level += 1
+        context.opened_groups += 1
+        group_head
+      end
+      def open_assertion(assertion_head = pass_through)
+        # these don't count as opened groups for backreference purposes
+        context.group_level += 1
+        assertion_head
+      end
+      def close_group
+        if context.negative_lookbehind
+          close_negative_lookbehind
+        else
+          context.group_level -= 1
+          if end_of_atomic_group?
+            close_atomic_group
+          else
+            ')'
+          end
+        end
+      end
+      def close_negative_lookbehind
+        context.negative_lookbehind = false
+        ''
+      end
+      def end_of_atomic_group?
+        return false unless context.group_level_for_backreference
+        context.group_level_for_backreference == context.group_level
+      end
+      def close_atomic_group
+        context.group_level_for_backreference = nil
+        # an empty passive group is appended in case literal digits follow
+        "))\\#{context.group_number_for_backreference}(?:)"
+      end
+    end
+  end
+end

data/lib/js_regex/converter/literal_converter.rb ADDED Viewed

@@ -0,0 +1,29 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class LiteralConverter < JsRegex::Converter::Base
+      def self.convert(data, converter)
+        utf8_data = data.dup.force_encoding('UTF-8')
+        if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
+          converter.send(:warn_of_unsupported_feature, 'astral plane character')
+        else
+          ensure_json_compatibility(utf8_data)
+        end
+      end
+      def self.ensure_json_compatibility(data)
+        data.gsub(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
+      end
+      private
+      def convert_data
+        self.class.convert(data, self)
+      end
+    end
+  end
+end

data/lib/js_regex/converter/meta_converter.rb ADDED Viewed

@@ -0,0 +1,28 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class MetaConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        case subtype
+        when :alternation
+          pass_through
+        when :dot
+          ruby_multiline_mode? ? '(?:.|\n)' : '.'
+        else
+          warn_of_unsupported_feature
+        end
+      end
+      def ruby_multiline_mode?
+        return false if @rb_mm == false
+        @rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
+      end
+    end
+  end
+end

data/lib/js_regex/converter/nonproperty_converter.rb ADDED Viewed

@@ -0,0 +1,18 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'property_converter'
+    #
+    # Template class implementation.
+    #
+    # Note the inheritance from PropertyConverter.
+    #
+    class NonpropertyConverter < JsRegex::Converter::PropertyConverter
+      private
+      def convert_data
+        convert_property(true)
+      end
+    end
+  end
+end

data/lib/js_regex/converter/property_converter.rb ADDED Viewed

@@ -0,0 +1,40 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    require_relative File.join('..', 'property_map')
+    #
+    # Template class implementation.
+    #
+    class PropertyConverter < JsRegex::Converter::Base
+      def self.property_replacement(property_name, negated = false)
+        replacement = JsRegex::PROPERTY_MAP[property_name.downcase.to_sym]
+        negated ? negated_property_replacement(replacement) : replacement
+      end
+      def self.negated_property_replacement(property_string)
+        # take care not to use destructive methods on elements in the map
+        return nil unless property_string
+        if property_string.start_with?('[^')
+          property_string.sub('[^', '[')
+        elsif property_string.start_with?('[')
+          property_string.sub('[', '[^')
+        else
+          # it's an invertable meta char
+          property_string.swapcase
+        end
+      end
+      private
+      def convert_data
+        convert_property
+      end
+      def convert_property(negated = false)
+        replace = self.class.property_replacement(subtype, negated)
+        replace || warn_of_unsupported_feature
+      end
+    end
+  end
+end

data/lib/js_regex/converter/quantifier_converter.rb ADDED Viewed

@@ -0,0 +1,37 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    #
+    # Template class implementation.
+    #
+    class QuantifierConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        if multiplicative_interval?
+          warn_of_unsupported_feature('multiplicative interval \'{x}{x}\'')
+        else
+          context.previous_quantifier_subtype = subtype
+          context.previous_quantifier_end = end_index
+          convert_quantifier
+        end
+      end
+      def convert_quantifier
+        if data.length > 1 && data.end_with?('+')
+          warn_of_unsupported_feature('declaration of quantifier as possessive')
+          data[0..-2]
+        else
+          pass_through
+        end
+      end
+      def multiplicative_interval?
+        subtype == :interval &&
+          context.previous_quantifier_subtype == :interval &&
+          context.previous_quantifier_end == start_index
+      end
+    end
+  end
+end

data/lib/js_regex/converter/set_converter.rb ADDED Viewed

@@ -0,0 +1,137 @@
+class JsRegex
+  #
+  module Converter
+    require_relative 'base'
+    require_relative 'literal_converter'
+    require_relative 'property_converter'
+    require_relative 'type_converter'
+    #
+    # Template class implementation.
+    #
+    # This converter works a little differently from the others.
+    #
+    # It buffers anything that it finds within a set in the Context's
+    # #buffered_set_members and #buffered_set_extractions Arrays,
+    # returning an empty String for all passed tokens, and only when
+    # the set is closed does it compile and return the final String.
+    #
+    class SetConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        case subtype
+        when :open then convert_open_subtype
+        when :negate then convert_negate_subtype
+        when :close then convert_close_subtype
+        when :member, :range, :escape then convert_member_subtype
+        when /\Aclass_/ then convert_class_subtype
+        when /\Atype_/ then convert_type_subtype
+        when :intersection
+          warn_of_unsupported_feature("set intersection '&&'")
+        else
+          # TODO: I think it's a bug in Regexp::Scanner that some property
+          # tokens (only positive ones?) are returned with token the class :set
+          # within sets. If this's fixed, just warn_of_unsupported_feature here.
+          try_replacing_potential_property_subtype
+        end
+      end
+      def convert_open_subtype
+        context.open_set
+        ''
+      end
+      def convert_negate_subtype
+        if context.set_level > 1
+          warn_of_unsupported_feature('nested negative set data')
+        end
+        context.negate_set
+        ''
+      end
+      def convert_close_subtype
+        context.close_set
+        context.set_level == 0 ? finalize_set : ''
+      end
+      def convert_member_subtype
+        literal_conversion = LiteralConverter.convert(data, self)
+        return '' if literal_conversion == ''
+        buffer_set_member(literal_conversion)
+      end
+      def convert_class_subtype
+        negated = subtype.to_s.start_with?('class_non')
+        name = subtype.to_s[(negated ? 9 : 6)..-1]
+        try_replacing_property(name, negated)
+      end
+      def try_replacing_potential_property_subtype
+        negated = subtype.to_s.start_with?('non')
+        name = negated ? subtype.to_s[3..-1] : subtype.to_s
+        try_replacing_property(name, negated)
+      end
+      def try_replacing_property(name, negated)
+        replacement = PropertyConverter.property_replacement(name, negated)
+        if replacement
+          buffer_set_extraction(replacement)
+        else
+          warn_of_unsupported_feature
+        end
+      end
+      def convert_type_subtype
+        if subtype == :type_hex
+          buffer_set_extraction(TypeConverter::HEX_EXPANSION)
+        elsif subtype == :type_nonhex
+          buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
+        else
+          buffer_set_member(data)
+        end
+      end
+      def buffer_set_member(string)
+        buffered_members << string unless context.nested_negation?
+        ''
+      end
+      def buffer_set_extraction(string)
+        buffered_extractions << string unless context.nested_negation?
+        ''
+      end
+      def buffered_members
+        context.buffered_set_members
+      end
+      def buffered_extractions
+        context.buffered_set_extractions
+      end
+      def finalize_set
+        if buffered_members.none?
+          finalize_depleted_set
+        else
+          set = build_set(buffered_members, context.negative_set?(1))
+          if buffered_extractions.any?
+            "(?:#{set}|#{buffered_extractions.join('|')})"
+          else
+            set
+          end
+        end
+      end
+      def finalize_depleted_set
+        case buffered_extractions.count
+        when 0 then ''
+        when 1 then buffered_extractions.first
+        else "(?:#{buffered_extractions.join('|')})" end
+      end
+      def build_set(members, negative)
+        "[#{negative ? '^' : ''}#{members.join}]"
+      end
+    end
+  end
+end