RubyGems - js_regex - Versions diffs - 3.7.2 → 3.8.0 - Mend

js_regex 3.7.2 → 3.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/lib/js_regex/conversion.rb +16 -11
data/lib/js_regex/converter/anchor_converter.rb +28 -6
data/lib/js_regex/converter/assertion_converter.rb +15 -4
data/lib/js_regex/converter/backreference_converter.rb +23 -6
data/lib/js_regex/converter/base.rb +10 -6
data/lib/js_regex/converter/conditional_converter.rb +2 -2
data/lib/js_regex/converter/context.rb +28 -2
data/lib/js_regex/converter/escape_converter.rb +11 -3
data/lib/js_regex/converter/group_converter.rb +14 -8
data/lib/js_regex/converter/keep_converter.rb +24 -0
data/lib/js_regex/converter/literal_converter.rb +8 -4
data/lib/js_regex/converter/meta_converter.rb +10 -0
data/lib/js_regex/converter/property_converter.rb +15 -0
data/lib/js_regex/converter/property_map.csv +171 -0
data/lib/js_regex/converter/set_converter.rb +8 -3
data/lib/js_regex/converter/type_converter.rb +23 -5
data/lib/js_regex/converter.rb +1 -0
data/lib/js_regex/node.rb +4 -2
data/lib/js_regex/second_pass.rb +41 -13
data/lib/js_regex/target.rb +19 -0
data/lib/js_regex/version.rb +1 -1
data/lib/js_regex.rb +5 -5
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3ec1645dcb85514957cbc0d1d08d973c44386e7ae554b83ceb56be807178a08d
-  data.tar.gz: cebf15dc34ca2e2f6ed1e57cc9a4719ee79c652070d8d0b687dae9278ed18e8d
+  metadata.gz: 8971658980813740deb03ece3c5af5bfdfd7412f0630fc4a3f172e4c06b11c52
+  data.tar.gz: bdafa3639a230b1ec1ac4661828050d99339eb18ec1768fa2a6f1b5e69d95f1b
 SHA512:
-  metadata.gz: 74eb573819cf814ca8c196dae4a6ddbb139b941947197cb115153ec749a89fe5189b1296693f86596ded4a882f81858b877b1514ff346289510dce351df8224f
-  data.tar.gz: 07e5743c335d15bcadfd465df576e32a5b9c6c27918b16c0282f1ac19ecf5ba7634fdf6b9a528963742316ce1a15f174098934da61d2c0a8975f639e1827a4b0
+  metadata.gz: 13abdf7b41485194f05cce79751ca60e8b1f9fc864b17f58294650df9f9e485a889b9571d847bf564aa12b709fd572c53f27a5a2900e3dc8bfa765f522b58e62
+  data.tar.gz: 31941c0d7a4842fdea84d5f649f3df30c54e8da24e6dbf722cff3e48661ae89646117f66f33ee419c0fc0e393b11f284cb565751b4f77aa5eb0bbbc8d38d903d

data/lib/js_regex/conversion.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 class JsRegex
   #
-  # This class acts as a facade, passing a regex to the converters.
+  # This class acts as a facade, passing a Regexp to the Converters.
   #
-  # ::of returns a source String, options String, and warnings Array.
+  # ::of returns a source String, options String, warnings Array, target String.
   #
   class Conversion
     require 'regexp_parser'
@@ -10,28 +10,33 @@ class JsRegex
     require_relative 'error'
     require_relative 'node'
     require_relative 'second_pass'
+    require_relative 'target'
     class << self
-      def of(input, options: nil)
-        source, warnings = convert_source(input)
-        options_string   = convert_options(input, options)
-        [source, options_string, warnings]
+      def of(input, options: nil, target: Target::ES2009)
+        target                       = Target.cast(target)
+        source, warnings, extra_opts = convert_source(input, target)
+        options_string               = convert_options(input, options, extra_opts)
+        [source, options_string, warnings, target]
       end
       private
-      def convert_source(input)
+      def convert_source(input, target)
         tree = Regexp::Parser.parse(input)
-        context = Converter::Context.new(case_insensitive_root: tree.i?)
+        context = Converter::Context.new(
+          case_insensitive_root: tree.i?,
+          target:                target,
+        )
         converted_tree = Converter.convert(tree, context)
         final_tree = SecondPass.call(converted_tree)
-        [final_tree.to_s, context.warnings]
+        [final_tree.to_s, context.warnings, context.required_options]
       rescue Regexp::Parser::Error => e
         raise e.extend(JsRegex::Error)
       end
-      def convert_options(input, custom_options)
-        options = custom_options.to_s.scan(/[gimuy]/)
+      def convert_options(input, custom_options, required_options)
+        options = custom_options.to_s.scan(/[gimsuy]/) + required_options
         if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
           options << 'i'
         end

data/lib/js_regex/converter/anchor_converter.rb CHANGED Viewed

@@ -13,17 +13,39 @@ class JsRegex
         when :bol, :bos then '^'
         when :eol, :eos then '$'
         when :eos_ob_eol then '(?=\n?$)'
-        when :word_boundary then pass_boundary_with_warning('\b')
-        when :nonword_boundary then pass_boundary_with_warning('\B')
+        when :word_boundary then convert_boundary
+        when :nonword_boundary then convert_nonboundary
         else
           warn_of_unsupported_feature
         end
       end
-      def pass_boundary_with_warning(boundary)
-        warn_of("The anchor '#{boundary}' at index #{expression.ts} "\
-                'only works at ASCII word boundaries in JavaScript.')
-        boundary
+      def convert_boundary
+        if context.es_2018_or_higher? && context.enable_u_option
+          BOUNDARY_EXPANSION
+        else
+          pass_boundary_with_warning
+        end
+      end
+      def convert_nonboundary
+        if context.es_2018_or_higher? && context.enable_u_option
+          NONBOUNDARY_EXPANSION
+        else
+          pass_boundary_with_warning
+        end
+      end
+      # This is an approximation to the word boundary behavior in Ruby, c.f.
+      # https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130
+      W                     = '\d\p{L}\p{M}\p{Pc}'
+      BOUNDARY_EXPANSION    = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))"
+      NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])"
+      def pass_boundary_with_warning
+        warn_of("The anchor '#{data}' at index #{expression.ts} only works "\
+                'at ASCII word boundaries with targets below ES2018".')
+        pass_through
       end
     end
   end

data/lib/js_regex/converter/assertion_converter.rb CHANGED Viewed

@@ -14,13 +14,24 @@ class JsRegex
       def convert_data
         case subtype
         when :lookahead, :nlookahead
-          build_group(head: pass_through, capturing: false)
+          keep_as_is
+        when :lookbehind
+          return keep_as_is if context.es_2018_or_higher?
+          warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018)
+          build_passive_group
         when :nlookbehind
-          warn_of_unsupported_feature('negative lookbehind assertion')
-        else # :lookbehind, ...
-          build_unsupported_group
+          return keep_as_is if context.es_2018_or_higher?
+          warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018)
+        else
+          warn_of_unsupported_feature
         end
       end
+      def keep_as_is
+        build_group(head: pass_through, capturing: false)
+      end
     end
   end
 end

data/lib/js_regex/converter/backreference_converter.rb CHANGED Viewed

@@ -10,16 +10,30 @@ class JsRegex
       def convert_data
         case subtype
-        when :name_ref, :number, :number_ref, :number_rel_ref then convert_ref
-        when :name_call, :number_call, :number_rel_call       then convert_call
+        when :name_ref then convert_name_ref
+        when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref
+        when :name_call, :number_call, :number_rel_call then convert_call
         else # name_recursion_ref, number_recursion_ref, ...
           warn_of_unsupported_feature
         end
       end
-      def convert_ref
-        position = context.new_capturing_group_position(target_position)
-        Node.new('\\', Node.new(position.to_s, type: :backref_num))
+      def convert_name_ref
+        if context.es_2018_or_higher?
+          # ES 2018+ supports named backrefs, but only the angled-bracket syntax
+          Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref)
+        else
+          convert_to_plain_num_ref
+        end
+      end
+      def convert_to_plain_num_ref
+        position = new_position
+        Node.new("\\#{position}", reference: position, type: :backref)
+      end
+      def new_position
+        context.new_capturing_group_position(target_position)
       end
       def target_position
@@ -31,7 +45,10 @@ class JsRegex
           return warn_of_unsupported_feature('whole-pattern recursion')
         end
         context.increment_local_capturing_group_count
-        convert_expression(expression.referenced_expression.unquantified_clone)
+        target_copy = expression.referenced_expression.unquantified_clone
+        # avoid "Duplicate capture group name" error in JS
+        target_copy.token = :capture if target_copy.is?(:named, :group)
+        convert_expression(target_copy)
       end
     end
   end

data/lib/js_regex/converter/base.rb CHANGED Viewed

@@ -51,10 +51,14 @@ class JsRegex
         Converter.convert(expression, context)
       end
-      def warn_of_unsupported_feature(description = nil)
+      def warn_of_unsupported_feature(description = nil, min_target: nil)
         description ||= "#{subtype} #{expression.type}".tr('_', ' ')
-        full_desc = "#{description} '#{expression}'"
-        warn_of("Dropped unsupported #{full_desc} at index #{expression.ts}")
+        full_text = "Dropped unsupported #{description} '#{expression}' "\
+                    "at index #{expression.ts}"
+        if min_target
+          full_text += " (requires at least `target: '#{min_target}'`)"
+        end
+        warn_of(full_text)
         drop
       end
@@ -68,11 +72,11 @@ class JsRegex
       alias drop_without_warning drop
       def wrap_in_backrefed_lookahead(content)
-        backref_num = context.capturing_group_count + 1
-        backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
+        number = context.capturing_group_count + 1
+        backref_node = Node.new("\\#{number}", reference: number, type: :backref)
         context.increment_local_capturing_group_count
         # an empty passive group (?:) is appended as literal digits may follow
-        Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
+        Node.new('(?=(', *content, '))', backref_node, '(?:)')
       end
     end
   end

data/lib/js_regex/converter/conditional_converter.rb CHANGED Viewed

@@ -10,12 +10,12 @@ class JsRegex
       def convert_data
         case subtype
-        when :open then mark_conditional
+        when :open then mark_conditional_for_second_pass
         else warn_of_unsupported_feature
         end
       end
-      def mark_conditional
+      def mark_conditional_for_second_pass
         reference = expression.referenced_expression.number
         node = Node.new('(?:', reference: reference, type: :conditional)
         expression.branches.each do |branch|

data/lib/js_regex/converter/context.rb CHANGED Viewed

@@ -11,12 +11,36 @@ class JsRegex
                   :in_atomic_group,
                   :warnings
-      def initialize(case_insensitive_root: false)
+      def initialize(case_insensitive_root: false, target: nil)
         self.added_capturing_groups_after_group = Hash.new(0)
         self.capturing_group_count = 0
         self.warnings = []
+        self.required_options_hash = {}
         self.case_insensitive_root = case_insensitive_root
+        self.target = target
+      end
+      # target context
+      def es_2015_or_higher?
+        target >= Target::ES2015
+      end
+      def es_2018_or_higher?
+        target >= Target::ES2018
+      end
+      # these methods allow appending options to the final Conversion output
+      def enable_u_option
+        return false unless es_2015_or_higher?
+        required_options_hash['u'] = true
+      end
+      def required_options
+        required_options_hash.keys
       end
       # group context
@@ -54,7 +78,9 @@ class JsRegex
       private
-      attr_accessor :added_capturing_groups_after_group
+      attr_accessor :added_capturing_groups_after_group,
+                    :required_options_hash,
+                    :target
       attr_writer :capturing_group_count,
                   :case_insensitive_root,

data/lib/js_regex/converter/escape_converter.rb CHANGED Viewed

@@ -41,7 +41,7 @@ class JsRegex
         when :control, :meta_sequence
           unicode_escape_codepoint
         when :literal
-          LiteralConverter.convert_data(expression.char)
+          LiteralConverter.convert_data(expression.char, context)
         when *ESCAPES_SHARED_BY_RUBY_AND_JS
           pass_through
         when :bell, :escape, :octal
@@ -52,11 +52,19 @@ class JsRegex
       end
       def convert_codepoint_list
-        expression.chars.each_with_object(Node.new) do |char, node|
-          node << LiteralConverter.convert_data(Regexp.escape(char))
+        if context.enable_u_option
+          split_codepoint_list
+        else
+          expression.chars.each_with_object(Node.new) do |char, node|
+            node << LiteralConverter.convert_data(Regexp.escape(char), context)
+          end
         end
       end
+      def split_codepoint_list
+        expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join
+      end
       def unicode_escape_codepoint
         "\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}"
       end

data/lib/js_regex/converter/group_converter.rb CHANGED Viewed

@@ -10,19 +10,30 @@ class JsRegex
       def convert_data
         case subtype
-        when :capture, :named then build_group
+        when :capture then build_group
+        when :named then build_named_group
         when :atomic then emulate_atomic_group
         when :comment then drop_without_warning
         when :options, :options_switch then build_options_group
         when :passive then build_passive_group
         when :absence then build_absence_group_if_simple
-        else build_unsupported_group
+        else warn_of_unsupported_feature
+        end
+      end
+      def build_named_group
+        if context.es_2018_or_higher?
+          # ES 2018+ supports named groups, but only the angled-bracket syntax
+          build_group(head: "(?<#{expression.name}>")
+        else
+          build_group
         end
       end
       def emulate_atomic_group
         if context.in_atomic_group
-          build_unsupported_group('nested atomic group')
+          warn_of_unsupported_feature('nested atomic group')
+          build_passive_group
         else
           context.start_atomic_group
           result = wrap_in_backrefed_lookahead(convert_subexpressions)
@@ -68,11 +79,6 @@ class JsRegex
         build_group(head: head, tail: tail, capturing: false)
       end
-      def build_unsupported_group(description = nil)
-        warn_of_unsupported_feature(description)
-        build_passive_group
-      end
       def build_group(opts = {})
         head = opts[:head] || '('
         tail = opts[:tail] || ')'

data/lib/js_regex/converter/keep_converter.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require_relative 'base'
+class JsRegex
+  module Converter
+    #
+    # Template class implementation.
+    #
+    class KeepConverter < JsRegex::Converter::Base
+      private
+      def convert_data
+        if context.es_2018_or_higher?
+          if expression.level.zero?
+            Node.new(type: :keep_mark) # mark for conversion in SecondPass
+          else
+            warn_of_unsupported_feature('nested keep mark')
+          end
+        else
+          warn_of_unsupported_feature('keep mark', min_target: Target::ES2018)
+        end
+      end
+    end
+  end
+end

data/lib/js_regex/converter/literal_converter.rb CHANGED Viewed

@@ -7,11 +7,15 @@ class JsRegex
     #
     class LiteralConverter < JsRegex::Converter::Base
       class << self
-        ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
+        ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
-        def convert_data(data)
+        def convert_data(data, context)
           if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
-            convert_astral_data(data)
+            if context.enable_u_option
+              escape_incompatible_bmp_literals(data)
+            else
+              convert_astral_data(data)
+            end
           else
             escape_incompatible_bmp_literals(data)
           end
@@ -41,7 +45,7 @@ class JsRegex
       private
       def convert_data
-        result = self.class.convert_data(data)
+        result = self.class.convert_data(data, context)
         if context.case_insensitive_root && !expression.case_insensitive?
           warn_of_unsupported_feature('nested case-sensitive literal')
         elsif !context.case_insensitive_root && expression.case_insensitive?

data/lib/js_regex/converter/meta_converter.rb CHANGED Viewed

@@ -8,6 +8,16 @@ class JsRegex
     class MetaConverter < JsRegex::Converter::Base
       DOT_EXPANSION    = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])'
       ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])'
+      # Possible improvements for dot conversion:
+      #
+      # In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately
+      # the dot keeps matching lone surrogates even with this flag, so the use
+      # of an expansion is still necessary to get the same behavior as in Ruby.
+      #
+      # ES2018 has the dotall flag 's', but it is tricky to use in conversions.
+      # 's' activates matching of BOTH astral chars and "\n", whereas the dot in
+      # Ruby doesn't match "\n" by default, and even with the 'm' flag set on
+      # the root, subexps might still exclude "\n" like so: /.(?-m:.)./m
       private

data/lib/js_regex/converter/property_converter.rb CHANGED Viewed

@@ -10,9 +10,24 @@ class JsRegex
     # codepoints matched by the property and build a set string from them.
     #
     class PropertyConverter < JsRegex::Converter::Base
+      # A map of normalized Ruby property names to names supported by ES2018+.
+      def self.map
+        @map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h
+      end
       private
       def convert_data
+        if context.es_2018_or_higher? &&
+            (prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')])
+          context.enable_u_option
+          "\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}"
+        else
+          build_character_set
+        end
+      end
+      def build_character_set
         content = CharacterSet.of_expression(expression)
         if expression.case_insensitive? && !context.case_insensitive_root

data/lib/js_regex/converter/property_map.csv ADDED Viewed

@@ -0,0 +1,171 @@
+# THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT
+ascii,ASCII
+asciihexdigit,ASCII_Hex_Digit
+adlam,Script=Adlam
+anatolianhieroglyphs,Script=Anatolian_Hieroglyphs
+armenian,Script=Armenian
+avestan,Script=Avestan
+bamum,Script=Bamum
+bassavah,Script=Bassa_Vah
+batak,Script=Batak
+bengali,Script=Bengali
+bhaiksuki,Script=Bhaiksuki
+bidicontrol,Bidi_Control
+bopomofo,Script=Bopomofo
+braille,Script=Braille
+buginese,Script=Buginese
+buhid,Script=Buhid
+carian,Script=Carian
+caucasianalbanian,Script=Caucasian_Albanian
+chakma,Script=Chakma
+cham,Script=Cham
+cherokee,Script=Cherokee
+chorasmian,Script=Chorasmian
+connectorpunctuation,Connector_Punctuation
+control,Control
+coptic,Script=Coptic
+cuneiform,Script=Cuneiform
+cypriot,Script=Cypriot
+cyrillic,Script=Cyrillic
+deprecated,Deprecated
+deseret,Script=Deseret
+devanagari,Script=Devanagari
+divesakuru,Script=Dives_Akuru
+dogra,Script=Dogra
+duployan,Script=Duployan
+egyptianhieroglyphs,Script=Egyptian_Hieroglyphs
+elbasan,Script=Elbasan
+elymaic,Script=Elymaic
+emojicomponent,Emoji_Component
+emojimodifier,Emoji_Modifier
+enclosingmark,Enclosing_Mark
+finalpunctuation,Final_Punctuation
+georgian,Script=Georgian
+gothic,Script=Gothic
+grantha,Script=Grantha
+greek,Script=Greek
+gujarati,Script=Gujarati
+gunjalagondi,Script=Gunjala_Gondi
+gurmukhi,Script=Gurmukhi
+hangul,Script=Hangul
+hanifirohingya,Script=Hanifi_Rohingya
+hanunoo,Script=Hanunoo
+hatran,Script=Hatran
+hebrew,Script=Hebrew
+hexdigit,Hex_Digit
+idsbinaryoperator,IDS_Binary_Operator
+idstrinaryoperator,IDS_Trinary_Operator
+imperialaramaic,Script=Imperial_Aramaic
+initialpunctuation,Initial_Punctuation
+inscriptionalpahlavi,Script=Inscriptional_Pahlavi
+inscriptionalparthian,Script=Inscriptional_Parthian
+javanese,Script=Javanese
+joincontrol,Join_Control
+kayahli,Script=Kayah_Li
+kharoshthi,Script=Kharoshthi
+khitansmallscript,Script=Khitan_Small_Script
+khmer,Script=Khmer
+khojki,Script=Khojki
+khudawadi,Script=Khudawadi
+lao,Script=Lao
+lepcha,Script=Lepcha
+letternumber,Letter_Number
+limbu,Script=Limbu
+lineseparator,Line_Separator
+lineara,Script=Linear_A
+linearb,Script=Linear_B
+lisu,Script=Lisu
+logicalorderexception,Logical_Order_Exception
+lycian,Script=Lycian
+lydian,Script=Lydian
+mahajani,Script=Mahajani
+makasar,Script=Makasar
+malayalam,Script=Malayalam
+mandaic,Script=Mandaic
+manichaean,Script=Manichaean
+marchen,Script=Marchen
+masaramgondi,Script=Masaram_Gondi
+math,Math
+mathsymbol,Math_Symbol
+medefaidrin,Script=Medefaidrin
+meeteimayek,Script=Meetei_Mayek
+mendekikakui,Script=Mende_Kikakui
+meroiticcursive,Script=Meroitic_Cursive
+meroitichieroglyphs,Script=Meroitic_Hieroglyphs
+miao,Script=Miao
+modi,Script=Modi
+mro,Script=Mro
+multani,Script=Multani
+myanmar,Script=Myanmar
+nabataean,Script=Nabataean
+nandinagari,Script=Nandinagari
+newtailue,Script=New_Tai_Lue
+newa,Script=Newa
+nko,Script=Nko
+noncharactercodepoint,Noncharacter_Code_Point
+nushu,Script=Nushu
+nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong
+ogham,Script=Ogham
+olchiki,Script=Ol_Chiki
+oldhungarian,Script=Old_Hungarian
+olditalic,Script=Old_Italic
+oldnortharabian,Script=Old_North_Arabian
+oldpermic,Script=Old_Permic
+oldpersian,Script=Old_Persian
+oldsogdian,Script=Old_Sogdian
+oldsoutharabian,Script=Old_South_Arabian
+oldturkic,Script=Old_Turkic
+oriya,Script=Oriya
+osage,Script=Osage
+osmanya,Script=Osmanya
+othernumber,Other_Number
+pahawhhmong,Script=Pahawh_Hmong
+palmyrene,Script=Palmyrene
+paragraphseparator,Paragraph_Separator
+patternsyntax,Pattern_Syntax
+patternwhitespace,Pattern_White_Space
+paucinhau,Script=Pau_Cin_Hau
+phagspa,Script=Phags_Pa
+phoenician,Script=Phoenician
+privateuse,Private_Use
+psalterpahlavi,Script=Psalter_Pahlavi
+quotationmark,Quotation_Mark
+radical,Radical
+regionalindicator,Regional_Indicator
+rejang,Script=Rejang
+runic,Script=Runic
+samaritan,Script=Samaritan
+saurashtra,Script=Saurashtra
+separator,Separator
+sharada,Script=Sharada
+shavian,Script=Shavian
+siddham,Script=Siddham
+signwriting,Script=SignWriting
+sinhala,Script=Sinhala
+sogdian,Script=Sogdian
+sorasompeng,Script=Sora_Sompeng
+soyombo,Script=Soyombo
+spaceseparator,Space_Separator
+sundanese,Script=Sundanese
+sylotinagri,Script=Syloti_Nagri
+syriac,Script=Syriac
+tagbanwa,Script=Tagbanwa
+taile,Script=Tai_Le
+taitham,Script=Tai_Tham
+taiviet,Script=Tai_Viet
+tamil,Script=Tamil
+tangut,Script=Tangut
+thaana,Script=Thaana
+thai,Script=Thai
+tibetan,Script=Tibetan
+tifinagh,Script=Tifinagh
+tirhuta,Script=Tirhuta
+titlecaseletter,Titlecase_Letter
+ugaritic,Script=Ugaritic
+vai,Script=Vai
+wancho,Script=Wancho
+warangciti,Script=Warang_Citi
+whitespace,White_Space
+yezidi,Script=Yezidi
+yi,Script=Yi
+zanabazarsquare,Script=Zanabazar_Square

data/lib/js_regex/converter/set_converter.rb CHANGED Viewed

@@ -26,7 +26,12 @@ class JsRegex
           warn_of_unsupported_feature('nested case-sensitive set')
         end
-        content.to_s_with_surrogate_ranges
+        if context.es_2015_or_higher?
+          context.enable_u_option if content.astral_part?
+          content.to_s(format: 'es6', in_brackets: true)
+        else
+          content.to_s_with_surrogate_ranges
+        end
       end
       def directly_compatible?
@@ -41,8 +46,8 @@ class JsRegex
       def child_directly_compatible?(exp)
         case exp.type
         when :literal
-          # surrogate pair substitution needed if astral
-          exp.text.ord <= 0xFFFF
+          # surrogate pair substitution needed on ES2009 if astral
+          exp.text.ord <= 0xFFFF || context.enable_u_option
         when :set
           # conversion needed for nested sets, intersections
           exp.token.equal?(:range)

data/lib/js_regex/converter/type_converter.rb CHANGED Viewed

@@ -6,9 +6,11 @@ class JsRegex
     # Template class implementation.
     #
     class TypeConverter < JsRegex::Converter::Base
-      HEX_EXPANSION       = '[0-9A-Fa-f]'
-      NONHEX_EXPANSION    = '[^0-9A-Fa-f]'
-      LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
+      HEX_EXPANSION           = '[0-9A-Fa-f]'
+      NONHEX_EXPANSION        = '[^0-9A-Fa-f]'
+      ES2018_HEX_EXPANSION    = '\p{AHex}'
+      ES2018_NONHEX_EXPANSION = '\P{AHex}'
+      LINEBREAK_EXPANSION     = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
       def self.directly_compatible?(expression)
         case expression.token
@@ -23,8 +25,8 @@ class JsRegex
       def convert_data
         case subtype
-        when :hex then HEX_EXPANSION
-        when :nonhex then NONHEX_EXPANSION
+        when :hex then hex_expansion
+        when :nonhex then nonhex_expansion
         when :linebreak then LINEBREAK_EXPANSION
         when :digit, :space, :word
           return pass_through if self.class.directly_compatible?(expression)
@@ -37,6 +39,22 @@ class JsRegex
         end
       end
+      def hex_expansion
+        if context.es_2018_or_higher? && context.enable_u_option
+          ES2018_HEX_EXPANSION
+        else
+          HEX_EXPANSION
+        end
+      end
+      def nonhex_expansion
+        if context.es_2018_or_higher? && context.enable_u_option
+          ES2018_NONHEX_EXPANSION
+        else
+          NONHEX_EXPANSION
+        end
+      end
       def negative_set_substitution
         # ::of_expression returns an inverted set for negative expressions,
         # so we need to un-invert before wrapping in [^ and ]. Kinda lame.

data/lib/js_regex/converter.rb CHANGED Viewed

@@ -13,6 +13,7 @@ class JsRegex
       expression:  SubexpressionConverter,
       free_space:  FreespaceConverter,
       group:       GroupConverter,
+      keep:        KeepConverter,
       literal:     LiteralConverter,
       meta:        MetaConverter,
       nonproperty: PropertyConverter,

data/lib/js_regex/node.rb CHANGED Viewed

@@ -9,10 +9,11 @@ class JsRegex
     attr_reader :children, :quantifier, :reference, :type
     TYPES = %i[
-      backref_num
+      backref
       captured_group
       conditional
       dropped
+      keep_mark
       plain
     ].freeze
@@ -46,7 +47,7 @@ class JsRegex
       case type
       when :dropped
         ''
-      when :backref_num, :captured_group, :plain
+      when :backref, :captured_group, :plain
         children.join << quantifier.to_s
       else
         raise TypeError.new(
@@ -59,6 +60,7 @@ class JsRegex
       self.children   = attrs.fetch(:children)   if attrs.key?(:children)
       self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier)
       self.type       = attrs.fetch(:type)       if attrs.key?(:type)
+      self
     end
     private

data/lib/js_regex/second_pass.rb CHANGED Viewed

@@ -6,12 +6,26 @@ class JsRegex
   module SecondPass
     class << self
       def call(tree)
+        substitute_root_level_keep_mark(tree)
         alternate_conditional_permutations(tree)
         tree
       end
       private
+      def substitute_root_level_keep_mark(tree)
+        keep_mark_index = nil
+        tree.children.each.with_index do |child, i|
+          break keep_mark_index = i if child.type == :keep_mark
+        end
+        return unless keep_mark_index
+        pre = tree.children[0...keep_mark_index]
+        post = tree.children[(keep_mark_index + 1)..-1]
+        lookbehind = Node.new('(?<=', *pre, ')')
+        tree.update(children: [lookbehind, *post])
+      end
       def alternate_conditional_permutations(tree)
         permutations = conditional_tree_permutations(tree)
         return if permutations.empty?
@@ -23,16 +37,16 @@ class JsRegex
       end
       def conditional_tree_permutations(tree)
-        all_conds = conditions(tree)
-        return [] if all_conds.empty?
+        conds = conditions(tree)
+        return [] if conds.empty?
         caps_per_branch = captured_group_count(tree)
-        condition_permutations(all_conds).map.with_index do |truthy_conds, i|
+        condition_permutations(conds).map.with_index do |truthy_conds, i|
           tree_permutation = tree.clone
           # find referenced groups and conditionals and make one-sided
           crawl(tree_permutation) do |node|
-            build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
+            build_permutation(node, conds, truthy_conds, caps_per_branch, i)
           end
         end
       end
@@ -63,16 +77,30 @@ class JsRegex
         end
       end
-      def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
+      def build_permutation(node, conds, truthy_conds, caps_per_branch, i)
         truthy = truthy_conds.include?(node.reference)
-        if node.type.equal?(:captured_group) &&
-          all_conds.include?(node.reference)
-          adapt_referenced_group_to_permutation(node, truthy)
-        elsif node.type.equal?(:conditional)
-          adapt_conditional_to_permutation(node, truthy)
-        elsif node.type.equal?(:backref_num)
+        case node.type
+        when :backref
+          # We cannot use named groups or backrefs in the conditional expansion,
+          # their repetition would cause a "Duplicate capture group name" error in JS.
+          node.update(children: [
+            node.children.first.sub(/k<.*>/, node.reference.to_s)
+          ])
+          # backref numbers need to be incremented for subsequent "branches"
           adapt_backref_to_permutation(node, caps_per_branch, i)
+        when :captured_group
+          # Remove name, c.f. :backref handling.
+          node.update(children: [
+            node.children.first.sub(/\?<.*>/, ''),
+            *node.children[1..-1]
+          ])
+          # if the group is referenced by any condition, modulate its quantity
+          if conds.include?(node.reference)
+            adapt_referenced_group_to_permutation(node, truthy)
+          end
+        when :conditional
+          adapt_conditional_to_permutation(node, truthy)
         end
       end
@@ -91,8 +119,8 @@ class JsRegex
       end
       def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
-        new_num = backref_node.children[0].to_i + caps_per_branch * i
-        backref_node.update(children: [new_num.to_s])
+        new_num = backref_node.reference + caps_per_branch * i
+        backref_node.update(children: ["\\#{new_num}"])
       end
       def min_quantify(node)

data/lib/js_regex/target.rb ADDED Viewed

@@ -0,0 +1,19 @@
+class JsRegex
+  module Target
+    ES2009 = 'ES2009'
+    ES2015 = 'ES2015'
+    ES2018 = 'ES2018'
+    SUPPORTED = [ES2009, ES2015, ES2018].freeze
+    def self.cast(arg)
+      return ES2009 if arg.nil?
+      normalized_arg = arg.to_s.upcase
+      return normalized_arg if SUPPORTED.include?(normalized_arg)
+      raise ArgumentError.new(
+        "Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}."
+      ).extend(JsRegex::Error)
+    end
+  end
+end

data/lib/js_regex/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class JsRegex
-  VERSION = '3.7.2'
+  VERSION = '3.8.0'
 end

data/lib/js_regex.rb CHANGED Viewed

@@ -12,10 +12,10 @@ class JsRegex
   require_relative File.join('js_regex', 'version')
   require 'json'
-  attr_reader :source, :options, :warnings
+  attr_reader :source, :options, :warnings, :target
-  def initialize(ruby_regex, options: nil)
-    @source, @options, @warnings = Conversion.of(ruby_regex, options: options)
+  def initialize(ruby_regex, **kwargs)
+    @source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs)
   end
   def to_h
@@ -30,8 +30,8 @@ class JsRegex
     "/#{source.empty? ? '(?:)' : source}/#{options}"
   end
-  def self.new!(ruby_regex, options: nil)
-    js_regex = new(ruby_regex, options: options)
+  def self.new!(ruby_regex, **kwargs)
+    js_regex = new(ruby_regex, **kwargs)
     if js_regex.warnings.any?
       raise StandardError.new(
         "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: js_regex
 version: !ruby/object:Gem::Version
-  version: 3.7.2
+  version: 3.8.0
 platform: ruby
 authors:
 - Janosch Müller
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-27 00:00:00.000000000 Z
+date: 2022-09-25 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: character_set
@@ -72,9 +72,11 @@ files:
 - lib/js_regex/converter/escape_converter.rb
 - lib/js_regex/converter/freespace_converter.rb
 - lib/js_regex/converter/group_converter.rb
+- lib/js_regex/converter/keep_converter.rb
 - lib/js_regex/converter/literal_converter.rb
 - lib/js_regex/converter/meta_converter.rb
 - lib/js_regex/converter/property_converter.rb
+- lib/js_regex/converter/property_map.csv
 - lib/js_regex/converter/set_converter.rb
 - lib/js_regex/converter/subexpression_converter.rb
 - lib/js_regex/converter/type_converter.rb
@@ -82,6 +84,7 @@ files:
 - lib/js_regex/error.rb
 - lib/js_regex/node.rb
 - lib/js_regex/second_pass.rb
+- lib/js_regex/target.rb
 - lib/js_regex/version.rb
 homepage: https://github.com/jaynetics/js_regex
 licenses: