RubyGems - regexp_parser - Versions diffs - 2.1.1 → 2.2.0 - Mend

regexp_parser 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +15 -21
data/Rakefile +5 -11
data/lib/regexp_parser/expression/base.rb +123 -0
data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
data/lib/regexp_parser/expression/classes/literal.rb +1 -5
data/lib/regexp_parser/expression/classes/property.rb +0 -2
data/lib/regexp_parser/expression/classes/root.rb +0 -1
data/lib/regexp_parser/expression/classes/type.rb +0 -2
data/lib/regexp_parser/expression/quantifier.rb +1 -1
data/lib/regexp_parser/expression/sequence.rb +0 -1
data/lib/regexp_parser/expression/subexpression.rb +0 -1
data/lib/regexp_parser/expression.rb +6 -130
data/lib/regexp_parser/lexer.rb +7 -5
data/lib/regexp_parser/scanner/properties/long.yml +13 -0
data/lib/regexp_parser/scanner/properties/short.yml +9 -1
data/lib/regexp_parser/syntax/any.rb +1 -3
data/lib/regexp_parser/syntax/base.rb +9 -9
data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
data/lib/regexp_parser/syntax/token/escape.rb +31 -0
data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
data/lib/regexp_parser/syntax/token.rb +45 -0
data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
data/lib/regexp_parser/syntax.rb +1 -1
data/lib/regexp_parser/token.rb +9 -20
data/lib/regexp_parser/version.rb +1 -1
data/lib/regexp_parser.rb +0 -2
data/spec/lexer/nesting_spec.rb +2 -2
data/spec/parser/escapes_spec.rb +43 -31
data/spec/parser/properties_spec.rb +6 -4
data/spec/parser/set/ranges_spec.rb +26 -16
data/spec/scanner/escapes_spec.rb +28 -19
data/spec/scanner/sets_spec.rb +9 -9
data/spec/spec_helper.rb +13 -1
data/spec/support/capturing_stderr.rb +9 -0
data/spec/syntax/versions/1.8.6_spec.rb +2 -2
data/spec/syntax/versions/2.0.0_spec.rb +2 -2
data/spec/syntax/versions/aliases_spec.rb +1 -0
metadata +26 -26
data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
data/lib/regexp_parser/syntax/tokens.rb +0 -45
data/spec/support/runner.rb +0 -42
data/spec/support/warning_extractor.rb +0 -60

data/lib/regexp_parser/scanner/properties/short.yml CHANGED Viewed

@@ -28,6 +28,7 @@ cari: carian
 cc: control
 cf: format
 cher: cherokee
+chrs: chorasmian
 ci: case_ignorable
 cn: unassigned
 co: private_use
@@ -45,12 +46,17 @@ dep: deprecated
 deva: devanagari
 di: default_ignorable_code_point
 dia: diacritic
+diak: dives_akuru
 dogr: dogra
 dsrt: deseret
 dupl: duployan
+ebase: emoji_modifier_base
+ecomp: emoji_component
 egyp: egyptian_hieroglyphs
 elba: elbasan
 elym: elymaic
+emod: emoji_modifier
+epres: emoji_presentation
 ethi: ethiopic
 ext: extender
 geor: georgian
@@ -89,6 +95,7 @@ kana: katakana
 khar: kharoshthi
 khmr: khmer
 khoj: khojki
+kits: khitan_small_script
 knda: kannada
 kthi: kaithi
 l: letter
@@ -127,7 +134,7 @@ mroo: mro
 mtei: meetei_mayek
 mult: multani
 mymr: myanmar
-n: number
+"n": number
 nand: nandinagari
 narb: old_north_arabian
 nbat: nabataean
@@ -226,6 +233,7 @@ xidc: xid_continue
 xids: xid_start
 xpeo: old_persian
 xsux: cuneiform
+yezi: yezidi
 yiii: yi
 z: separator
 zanb: zanabazar_square

data/lib/regexp_parser/syntax/any.rb CHANGED Viewed

@@ -1,15 +1,13 @@
 module Regexp::Syntax
   # A syntax that always returns true, passing all tokens as implemented. This
   # is useful during development, testing, and should be useful for some types
   # of transformations as well.
   class Any < Base
     def initialize # rubocop:disable Lint/MissingSuper
-      @implements = { :* => [:*] }
+      @implements = { :* => %i[*] }
     end
     def implements?(_type, _token) true end
     def implements!(_type, _token) true end
   end
 end

data/lib/regexp_parser/syntax/base.rb CHANGED Viewed

@@ -59,7 +59,7 @@ module Regexp::Syntax
     def normalize_group(type, token)
       case token
       when :named_ab, :named_sq
-        [:group, :named]
+        %i[group named]
       else
         [type, token]
       end
@@ -68,21 +68,21 @@ module Regexp::Syntax
     def normalize_backref(type, token)
       case token
       when :name_ref_ab, :name_ref_sq
-        [:backref, :name_ref]
+        %i[backref name_ref]
       when :name_call_ab, :name_call_sq
-        [:backref, :name_call]
+        %i[backref name_call]
       when :name_recursion_ref_ab, :name_recursion_ref_sq
-        [:backref, :name_recursion_ref]
+        %i[backref name_recursion_ref]
       when :number_ref_ab, :number_ref_sq
-        [:backref, :number_ref]
+        %i[backref number_ref]
       when :number_call_ab, :number_call_sq
-        [:backref, :number_call]
+        %i[backref number_call]
       when :number_rel_ref_ab, :number_rel_ref_sq
-        [:backref, :number_rel_ref]
+        %i[backref number_rel_ref]
       when :number_rel_call_ab, :number_rel_call_sq
-        [:backref, :number_rel_call]
+        %i[backref number_rel_call]
       when :number_recursion_ref_ab, :number_recursion_ref_sq
-        [:backref, :number_recursion_ref]
+        %i[backref number_recursion_ref]
       else
         [type, token]
       end

data/lib/regexp_parser/syntax/token/anchor.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module Regexp::Syntax
+  module Token
+    module Anchor
+      Basic       = %i[bol eol]
+      Extended    = Basic + %i[word_boundary nonword_boundary]
+      String      = %i[bos eos eos_ob_eol]
+      MatchStart  = %i[match_start]
+      All = Extended + String + MatchStart
+      Type = :anchor
+    end
+    Map[Anchor::Type] = Anchor::All
+  end
+end

data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb RENAMED Viewed

@@ -1,8 +1,8 @@
 module Regexp::Syntax
   module Token
     module Assertion
-      Lookahead = [:lookahead, :nlookahead]
-      Lookbehind = [:lookbehind, :nlookbehind]
+      Lookahead = %i[lookahead nlookahead]
+      Lookbehind = %i[lookbehind nlookbehind]
       All = Lookahead + Lookbehind
       Type = :assertion

data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} RENAMED Viewed

@@ -1,10 +1,11 @@
 module Regexp::Syntax
   module Token
     module Backreference
-      Name      = [:name_ref]
-      Number    = [:number, :number_ref, :number_rel_ref]
+      Plain     = %i[number]
+      Number    = Plain + %i[number_ref number_rel_ref]
+      Name      = %i[name_ref]
-      RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
+      RecursionLevel = %i[name_recursion_ref number_recursion_ref]
       All = Name + Number + RecursionLevel
       Type = :backref
@@ -12,8 +13,8 @@ module Regexp::Syntax
     # Type is the same as Backreference so keeping it here, for now.
     module SubexpressionCall
-      Name      = [:name_call]
-      Number    = [:number_call, :number_rel_call]
+      Name      = %i[name_call]
+      Number    = %i[number_call number_rel_call]
       All = Name + Number
     end

data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb RENAMED Viewed

@@ -1,8 +1,8 @@
 module Regexp::Syntax
   module Token
     module CharacterSet
-      Basic     = [:open, :close, :negate, :range]
-      Extended  = Basic + [:intersection]
+      Basic     = %i[open close negate range]
+      Extended  = Basic + %i[intersection]
       All = Extended
       Type = :set

data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb RENAMED Viewed

@@ -2,10 +2,10 @@ module Regexp::Syntax
   module Token
     module CharacterType
       Basic     = []
-      Extended  = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
-      Hex       = [:hex, :nonhex]
+      Extended  = %i[digit nondigit space nonspace word nonword]
+      Hex       = %i[hex nonhex]
-      Clustered = [:linebreak, :xgrapheme]
+      Clustered = %i[linebreak xgrapheme]
       All = Basic + Extended + Hex + Clustered
       Type = :type

data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb RENAMED Viewed

@@ -1,10 +1,10 @@
 module Regexp::Syntax
   module Token
     module Conditional
-      Delimiters = [:open, :close]
+      Delimiters = %i[open close]
-      Condition  = [:condition_open, :condition, :condition_close]
-      Separator  = [:separator]
+      Condition  = %i[condition_open condition condition_close]
+      Separator  = %i[separator]
       All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator

data/lib/regexp_parser/syntax/token/escape.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module Regexp::Syntax
+  module Token
+    # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
+    module Escape
+      Basic = %i[backslash literal]
+      Control = %i[control meta_sequence]
+      ASCII = %i[bell backspace escape form_feed newline carriage
+                 tab vertical_tab]
+      Unicode = %i[codepoint codepoint_list]
+      Meta  = %i[dot alternation
+                 zero_or_one zero_or_more one_or_more
+                 bol eol
+                 group_open group_close
+                 interval_open interval_close
+                 set_open set_close]
+      Hex   = %i[hex]
+      Octal = %i[octal]
+      All   = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
+      Type  = :escape
+    end
+    Map[Escape::Type] = Escape::All
+  end
+end

data/lib/regexp_parser/syntax/{tokens → token}/group.rb RENAMED Viewed

@@ -1,18 +1,18 @@
 module Regexp::Syntax
   module Token
     module Group
-      Basic     = [:capture, :close]
-      Extended  = Basic + [:options, :options_switch]
+      Basic     = %i[capture close]
+      Extended  = Basic + %i[options options_switch]
-      Named     = [:named]
-      Atomic    = [:atomic]
-      Passive   = [:passive]
-      Comment   = [:comment]
+      Named     = %i[named]
+      Atomic    = %i[atomic]
+      Passive   = %i[passive]
+      Comment   = %i[comment]
       V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
                Group::Passive + Group::Comment
-      V2_4_1 = [:absence]
+      V2_4_1 = %i[absence]
       All = V1_8_6 + V2_4_1
       Type = :group

data/lib/regexp_parser/syntax/{tokens → token}/keep.rb RENAMED Viewed

@@ -1,7 +1,7 @@
 module Regexp::Syntax
   module Token
     module Keep
-      Mark = [:mark]
+      Mark = %i[mark]
       All  = Mark
       Type = :keep

data/lib/regexp_parser/syntax/{tokens → token}/meta.rb RENAMED Viewed

@@ -1,8 +1,8 @@
 module Regexp::Syntax
   module Token
     module Meta
-      Basic    = [:dot]
-      Extended = Basic + [:alternation]
+      Basic    = %i[dot]
+      Extended = Basic + %i[alternation]
       All = Extended
       Type = :meta

data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb RENAMED Viewed

@@ -1,10 +1,10 @@
 module Regexp::Syntax
   module Token
     module PosixClass
-      Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
-                  :lower, :print, :punct, :space, :upper, :xdigit]
+      Standard = %i[alnum alpha blank cntrl digit graph
+                    lower print punct space upper xdigit]
-      Extensions = [:ascii, :word]
+      Extensions = %i[ascii word]
       All = Standard + Extensions
       Type = :posixclass

data/lib/regexp_parser/syntax/token/quantifier.rb ADDED Viewed

@@ -0,0 +1,35 @@
+module Regexp::Syntax
+  module Token
+    module Quantifier
+      Greedy    = %i[
+        zero_or_one
+        zero_or_more
+        one_or_more
+      ]
+      Reluctant = %i[
+        zero_or_one_reluctant
+        zero_or_more_reluctant
+        one_or_more_reluctant
+      ]
+      Possessive  = %i[
+        zero_or_one_possessive
+        zero_or_more_possessive
+        one_or_more_possessive
+      ]
+      Interval             = %i[interval]
+      IntervalReluctant    = %i[interval_reluctant]
+      IntervalPossessive   = %i[interval_possessive]
+      IntervalAll = Interval + IntervalReluctant +
+                    IntervalPossessive
+      All = Greedy + Reluctant + Possessive + IntervalAll
+      Type = :quantifier
+    end
+    Map[Quantifier::Type] = Quantifier::All
+  end
+end