regexp_parser 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/Gemfile +5 -1
  4. data/README.md +15 -21
  5. data/Rakefile +11 -17
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  15. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  16. data/lib/regexp_parser/expression/classes/group.rb +6 -1
  17. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  18. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  19. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  20. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  21. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  22. data/lib/regexp_parser/expression/sequence.rb +3 -10
  23. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  24. data/lib/regexp_parser/expression.rb +7 -130
  25. data/lib/regexp_parser/lexer.rb +7 -5
  26. data/lib/regexp_parser/parser.rb +282 -334
  27. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  28. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  29. data/lib/regexp_parser/scanner/scanner.rl +64 -87
  30. data/lib/regexp_parser/scanner.rb +1024 -1073
  31. data/lib/regexp_parser/syntax/any.rb +2 -4
  32. data/lib/regexp_parser/syntax/base.rb +10 -10
  33. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  36. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  37. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  38. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  39. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  41. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  42. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  44. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  45. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  46. data/lib/regexp_parser/syntax/token.rb +45 -0
  47. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  48. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  49. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  50. data/lib/regexp_parser/syntax.rb +8 -6
  51. data/lib/regexp_parser/token.rb +9 -20
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +0 -2
  54. data/spec/expression/clone_spec.rb +36 -4
  55. data/spec/expression/free_space_spec.rb +2 -2
  56. data/spec/expression/methods/match_length_spec.rb +2 -2
  57. data/spec/lexer/nesting_spec.rb +2 -2
  58. data/spec/lexer/refcalls_spec.rb +5 -0
  59. data/spec/parser/all_spec.rb +2 -2
  60. data/spec/parser/escapes_spec.rb +43 -31
  61. data/spec/parser/properties_spec.rb +6 -4
  62. data/spec/parser/refcalls_spec.rb +5 -0
  63. data/spec/parser/set/ranges_spec.rb +26 -16
  64. data/spec/scanner/escapes_spec.rb +29 -20
  65. data/spec/scanner/refcalls_spec.rb +19 -0
  66. data/spec/scanner/sets_spec.rb +66 -23
  67. data/spec/spec_helper.rb +13 -1
  68. data/spec/support/capturing_stderr.rb +9 -0
  69. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  70. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  71. data/spec/syntax/versions/aliases_spec.rb +1 -0
  72. metadata +27 -26
  73. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  74. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  75. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  76. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  77. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  78. data/spec/support/runner.rb +0 -42
  79. data/spec/support/warning_extractor.rb +0 -60
@@ -1,15 +1,13 @@
1
1
  module Regexp::Syntax
2
-
3
2
  # A syntax that always returns true, passing all tokens as implemented. This
4
3
  # is useful during development, testing, and should be useful for some types
5
4
  # of transformations as well.
6
5
  class Any < Base
7
- def initialize
8
- @implements = { :* => [:*] }
6
+ def initialize # rubocop:disable Lint/MissingSuper
7
+ @implements = { :* => %i[*] }
9
8
  end
10
9
 
11
10
  def implements?(_type, _token) true end
12
11
  def implements!(_type, _token) true end
13
12
  end
14
-
15
13
  end
@@ -1,7 +1,7 @@
1
1
  require 'set'
2
2
 
3
3
  module Regexp::Syntax
4
- class NotImplementedError < SyntaxError
4
+ class NotImplementedError < Regexp::Syntax::SyntaxError
5
5
  def initialize(syntax, type, token)
6
6
  super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
7
7
  end
@@ -59,7 +59,7 @@ module Regexp::Syntax
59
59
  def normalize_group(type, token)
60
60
  case token
61
61
  when :named_ab, :named_sq
62
- [:group, :named]
62
+ %i[group named]
63
63
  else
64
64
  [type, token]
65
65
  end
@@ -68,21 +68,21 @@ module Regexp::Syntax
68
68
  def normalize_backref(type, token)
69
69
  case token
70
70
  when :name_ref_ab, :name_ref_sq
71
- [:backref, :name_ref]
71
+ %i[backref name_ref]
72
72
  when :name_call_ab, :name_call_sq
73
- [:backref, :name_call]
73
+ %i[backref name_call]
74
74
  when :name_recursion_ref_ab, :name_recursion_ref_sq
75
- [:backref, :name_recursion_ref]
75
+ %i[backref name_recursion_ref]
76
76
  when :number_ref_ab, :number_ref_sq
77
- [:backref, :number_ref]
77
+ %i[backref number_ref]
78
78
  when :number_call_ab, :number_call_sq
79
- [:backref, :number_call]
79
+ %i[backref number_call]
80
80
  when :number_rel_ref_ab, :number_rel_ref_sq
81
- [:backref, :number_rel_ref]
81
+ %i[backref number_rel_ref]
82
82
  when :number_rel_call_ab, :number_rel_call_sq
83
- [:backref, :number_rel_call]
83
+ %i[backref number_rel_call]
84
84
  when :number_recursion_ref_ab, :number_recursion_ref_sq
85
- [:backref, :number_recursion_ref]
85
+ %i[backref number_recursion_ref]
86
86
  else
87
87
  [type, token]
88
88
  end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Anchor
4
+ Basic = %i[bol eol]
5
+ Extended = Basic + %i[word_boundary nonword_boundary]
6
+ String = %i[bos eos eos_ob_eol]
7
+ MatchStart = %i[match_start]
8
+
9
+ All = Extended + String + MatchStart
10
+ Type = :anchor
11
+ end
12
+
13
+ Map[Anchor::Type] = Anchor::All
14
+ end
15
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Assertion
4
- Lookahead = [:lookahead, :nlookahead]
5
- Lookbehind = [:lookbehind, :nlookbehind]
4
+ Lookahead = %i[lookahead nlookahead]
5
+ Lookbehind = %i[lookbehind nlookbehind]
6
6
 
7
7
  All = Lookahead + Lookbehind
8
8
  Type = :assertion
@@ -1,10 +1,11 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Backreference
4
- Name = [:name_ref]
5
- Number = [:number, :number_ref, :number_rel_ref]
4
+ Plain = %i[number]
5
+ Number = Plain + %i[number_ref number_rel_ref]
6
+ Name = %i[name_ref]
6
7
 
7
- RecursionLevel = [:name_recursion_ref, :number_recursion_ref]
8
+ RecursionLevel = %i[name_recursion_ref number_recursion_ref]
8
9
 
9
10
  All = Name + Number + RecursionLevel
10
11
  Type = :backref
@@ -12,8 +13,8 @@ module Regexp::Syntax
12
13
 
13
14
  # Type is the same as Backreference so keeping it here, for now.
14
15
  module SubexpressionCall
15
- Name = [:name_call]
16
- Number = [:number_call, :number_rel_call]
16
+ Name = %i[name_call]
17
+ Number = %i[number_call number_rel_call]
17
18
 
18
19
  All = Name + Number
19
20
  end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
4
+ Basic = %i[open close negate range]
5
+ Extended = Basic + %i[intersection]
6
6
 
7
7
  All = Extended
8
8
  Type = :set
@@ -2,10 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterType
4
4
  Basic = []
5
- Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
6
- Hex = [:hex, :nonhex]
5
+ Extended = %i[digit nondigit space nonspace word nonword]
6
+ Hex = %i[hex nonhex]
7
7
 
8
- Clustered = [:linebreak, :xgrapheme]
8
+ Clustered = %i[linebreak xgrapheme]
9
9
 
10
10
  All = Basic + Extended + Hex + Clustered
11
11
  Type = :type
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Conditional
4
- Delimiters = [:open, :close]
4
+ Delimiters = %i[open close]
5
5
 
6
- Condition = [:condition_open, :condition, :condition_close]
7
- Separator = [:separator]
6
+ Condition = %i[condition_open condition condition_close]
7
+ Separator = %i[separator]
8
8
 
9
9
  All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
10
10
 
@@ -0,0 +1,31 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
4
+ module Escape
5
+ Basic = %i[backslash literal]
6
+
7
+ Control = %i[control meta_sequence]
8
+
9
+ ASCII = %i[bell backspace escape form_feed newline carriage
10
+ tab vertical_tab]
11
+
12
+ Unicode = %i[codepoint codepoint_list]
13
+
14
+ Meta = %i[dot alternation
15
+ zero_or_one zero_or_more one_or_more
16
+ bol eol
17
+ group_open group_close
18
+ interval_open interval_close
19
+ set_open set_close]
20
+
21
+ Hex = %i[hex]
22
+
23
+ Octal = %i[octal]
24
+
25
+ All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
26
+ Type = :escape
27
+ end
28
+
29
+ Map[Escape::Type] = Escape::All
30
+ end
31
+ end
@@ -1,18 +1,18 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Group
4
- Basic = [:capture, :close]
5
- Extended = Basic + [:options, :options_switch]
4
+ Basic = %i[capture close]
5
+ Extended = Basic + %i[options options_switch]
6
6
 
7
- Named = [:named]
8
- Atomic = [:atomic]
9
- Passive = [:passive]
10
- Comment = [:comment]
7
+ Named = %i[named]
8
+ Atomic = %i[atomic]
9
+ Passive = %i[passive]
10
+ Comment = %i[comment]
11
11
 
12
12
  V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
13
13
  Group::Passive + Group::Comment
14
14
 
15
- V2_4_1 = [:absence]
15
+ V2_4_1 = %i[absence]
16
16
 
17
17
  All = V1_8_6 + V2_4_1
18
18
  Type = :group
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Keep
4
- Mark = [:mark]
4
+ Mark = %i[mark]
5
5
 
6
6
  All = Mark
7
7
  Type = :keep
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
4
+ Basic = %i[dot]
5
+ Extended = Basic + %i[alternation]
6
6
 
7
7
  All = Extended
8
8
  Type = :meta
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module PosixClass
4
- Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
5
- :lower, :print, :punct, :space, :upper, :xdigit]
4
+ Standard = %i[alnum alpha blank cntrl digit graph
5
+ lower print punct space upper xdigit]
6
6
 
7
- Extensions = [:ascii, :word]
7
+ Extensions = %i[ascii word]
8
8
 
9
9
  All = Standard + Extensions
10
10
  Type = :posixclass
@@ -0,0 +1,35 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Quantifier
4
+ Greedy = %i[
5
+ zero_or_one
6
+ zero_or_more
7
+ one_or_more
8
+ ]
9
+
10
+ Reluctant = %i[
11
+ zero_or_one_reluctant
12
+ zero_or_more_reluctant
13
+ one_or_more_reluctant
14
+ ]
15
+
16
+ Possessive = %i[
17
+ zero_or_one_possessive
18
+ zero_or_more_possessive
19
+ one_or_more_possessive
20
+ ]
21
+
22
+ Interval = %i[interval]
23
+ IntervalReluctant = %i[interval_reluctant]
24
+ IntervalPossessive = %i[interval_possessive]
25
+
26
+ IntervalAll = Interval + IntervalReluctant +
27
+ IntervalPossessive
28
+
29
+ All = Greedy + Reluctant + Possessive + IntervalAll
30
+ Type = :quantifier
31
+ end
32
+
33
+ Map[Quantifier::Type] = Quantifier::All
34
+ end
35
+ end