regexp_parser 1.7.1 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +157 -1
  3. data/Gemfile +6 -1
  4. data/LICENSE +1 -1
  5. data/README.md +38 -32
  6. data/Rakefile +18 -27
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +123 -0
  9. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  10. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  14. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  15. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  16. data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
  17. data/lib/regexp_parser/expression/classes/group.rb +28 -3
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -17
  21. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  22. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  23. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  24. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  25. data/lib/regexp_parser/expression/quantifier.rb +11 -2
  26. data/lib/regexp_parser/expression/sequence.rb +3 -20
  27. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  28. data/lib/regexp_parser/expression.rb +7 -139
  29. data/lib/regexp_parser/lexer.rb +13 -11
  30. data/lib/regexp_parser/parser.rb +325 -344
  31. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  32. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  33. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  34. data/lib/regexp_parser/scanner/property.rl +2 -2
  35. data/lib/regexp_parser/scanner/scanner.rl +235 -255
  36. data/lib/regexp_parser/scanner.rb +1324 -1387
  37. data/lib/regexp_parser/syntax/any.rb +4 -6
  38. data/lib/regexp_parser/syntax/base.rb +13 -15
  39. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  41. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  42. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  44. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  45. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  46. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  47. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  48. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  49. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  50. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  51. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  52. data/lib/regexp_parser/syntax/token.rb +45 -0
  53. data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
  54. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  55. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  56. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  57. data/lib/regexp_parser/syntax.rb +8 -6
  58. data/lib/regexp_parser/token.rb +9 -20
  59. data/lib/regexp_parser/version.rb +1 -1
  60. data/lib/regexp_parser.rb +0 -2
  61. data/regexp_parser.gemspec +20 -22
  62. metadata +34 -165
  63. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  64. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  65. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  66. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  67. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  68. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  69. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  70. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  71. data/spec/expression/base_spec.rb +0 -94
  72. data/spec/expression/clone_spec.rb +0 -120
  73. data/spec/expression/conditional_spec.rb +0 -89
  74. data/spec/expression/free_space_spec.rb +0 -27
  75. data/spec/expression/methods/match_length_spec.rb +0 -161
  76. data/spec/expression/methods/match_spec.rb +0 -25
  77. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  78. data/spec/expression/methods/tests_spec.rb +0 -99
  79. data/spec/expression/methods/traverse_spec.rb +0 -161
  80. data/spec/expression/options_spec.rb +0 -128
  81. data/spec/expression/root_spec.rb +0 -9
  82. data/spec/expression/sequence_spec.rb +0 -9
  83. data/spec/expression/subexpression_spec.rb +0 -50
  84. data/spec/expression/to_h_spec.rb +0 -26
  85. data/spec/expression/to_s_spec.rb +0 -100
  86. data/spec/lexer/all_spec.rb +0 -22
  87. data/spec/lexer/conditionals_spec.rb +0 -53
  88. data/spec/lexer/delimiters_spec.rb +0 -68
  89. data/spec/lexer/escapes_spec.rb +0 -14
  90. data/spec/lexer/keep_spec.rb +0 -10
  91. data/spec/lexer/literals_spec.rb +0 -89
  92. data/spec/lexer/nesting_spec.rb +0 -99
  93. data/spec/lexer/refcalls_spec.rb +0 -55
  94. data/spec/parser/all_spec.rb +0 -43
  95. data/spec/parser/alternation_spec.rb +0 -88
  96. data/spec/parser/anchors_spec.rb +0 -17
  97. data/spec/parser/conditionals_spec.rb +0 -179
  98. data/spec/parser/errors_spec.rb +0 -30
  99. data/spec/parser/escapes_spec.rb +0 -121
  100. data/spec/parser/free_space_spec.rb +0 -130
  101. data/spec/parser/groups_spec.rb +0 -108
  102. data/spec/parser/keep_spec.rb +0 -6
  103. data/spec/parser/posix_classes_spec.rb +0 -8
  104. data/spec/parser/properties_spec.rb +0 -115
  105. data/spec/parser/quantifiers_spec.rb +0 -52
  106. data/spec/parser/refcalls_spec.rb +0 -112
  107. data/spec/parser/set/intersections_spec.rb +0 -127
  108. data/spec/parser/set/ranges_spec.rb +0 -111
  109. data/spec/parser/sets_spec.rb +0 -178
  110. data/spec/parser/types_spec.rb +0 -18
  111. data/spec/scanner/all_spec.rb +0 -18
  112. data/spec/scanner/anchors_spec.rb +0 -21
  113. data/spec/scanner/conditionals_spec.rb +0 -128
  114. data/spec/scanner/delimiters_spec.rb +0 -52
  115. data/spec/scanner/errors_spec.rb +0 -67
  116. data/spec/scanner/escapes_spec.rb +0 -53
  117. data/spec/scanner/free_space_spec.rb +0 -133
  118. data/spec/scanner/groups_spec.rb +0 -52
  119. data/spec/scanner/keep_spec.rb +0 -10
  120. data/spec/scanner/literals_spec.rb +0 -49
  121. data/spec/scanner/meta_spec.rb +0 -18
  122. data/spec/scanner/properties_spec.rb +0 -64
  123. data/spec/scanner/quantifiers_spec.rb +0 -20
  124. data/spec/scanner/refcalls_spec.rb +0 -36
  125. data/spec/scanner/sets_spec.rb +0 -102
  126. data/spec/scanner/types_spec.rb +0 -14
  127. data/spec/spec_helper.rb +0 -15
  128. data/spec/support/runner.rb +0 -42
  129. data/spec/support/shared_examples.rb +0 -77
  130. data/spec/support/warning_extractor.rb +0 -60
  131. data/spec/syntax/syntax_spec.rb +0 -48
  132. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  133. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  134. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  135. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  136. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  137. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  138. data/spec/syntax/versions/aliases_spec.rb +0 -37
  139. data/spec/token/token_spec.rb +0 -85
@@ -1,15 +1,13 @@
1
1
  module Regexp::Syntax
2
-
3
2
  # A syntax that always returns true, passing all tokens as implemented. This
4
3
  # is useful during development, testing, and should be useful for some types
5
4
  # of transformations as well.
6
5
  class Any < Base
7
- def initialize
8
- @implements = { :* => [:*] }
6
+ def initialize # rubocop:disable Lint/MissingSuper
7
+ @implements = { :* => %i[*] }
9
8
  end
10
9
 
11
- def implements?(type, token) true end
12
- def implements!(type, token) true end
10
+ def implements?(_type, _token) true end
11
+ def implements!(_type, _token) true end
13
12
  end
14
-
15
13
  end
@@ -1,7 +1,5 @@
1
- require 'set'
2
-
3
1
  module Regexp::Syntax
4
- class NotImplementedError < SyntaxError
2
+ class NotImplementedError < Regexp::Syntax::SyntaxError
5
3
  def initialize(syntax, type, token)
6
4
  super "#{syntax.class.name} does not implement: [#{type}:#{token}]"
7
5
  end
@@ -23,15 +21,15 @@ module Regexp::Syntax
23
21
  end
24
22
 
25
23
  def implementations(type)
26
- @implements[type] ||= Set.new
24
+ @implements[type] ||= []
27
25
  end
28
26
 
29
27
  def implements(type, tokens)
30
- implementations(type).merge(Array(tokens))
28
+ implementations(type).concat(Array(tokens))
31
29
  end
32
30
 
33
31
  def excludes(type, tokens)
34
- implementations(type).subtract(Array(tokens))
32
+ Array(tokens).each { |tok| implementations(type).delete(tok) }
35
33
  end
36
34
 
37
35
  def implements?(type, token)
@@ -59,7 +57,7 @@ module Regexp::Syntax
59
57
  def normalize_group(type, token)
60
58
  case token
61
59
  when :named_ab, :named_sq
62
- [:group, :named]
60
+ %i[group named]
63
61
  else
64
62
  [type, token]
65
63
  end
@@ -68,21 +66,21 @@ module Regexp::Syntax
68
66
  def normalize_backref(type, token)
69
67
  case token
70
68
  when :name_ref_ab, :name_ref_sq
71
- [:backref, :name_ref]
69
+ %i[backref name_ref]
72
70
  when :name_call_ab, :name_call_sq
73
- [:backref, :name_call]
71
+ %i[backref name_call]
74
72
  when :name_recursion_ref_ab, :name_recursion_ref_sq
75
- [:backref, :name_recursion_ref]
73
+ %i[backref name_recursion_ref]
76
74
  when :number_ref_ab, :number_ref_sq
77
- [:backref, :number_ref]
75
+ %i[backref number_ref]
78
76
  when :number_call_ab, :number_call_sq
79
- [:backref, :number_call]
77
+ %i[backref number_call]
80
78
  when :number_rel_ref_ab, :number_rel_ref_sq
81
- [:backref, :number_rel_ref]
79
+ %i[backref number_rel_ref]
82
80
  when :number_rel_call_ab, :number_rel_call_sq
83
- [:backref, :number_rel_call]
81
+ %i[backref number_rel_call]
84
82
  when :number_recursion_ref_ab, :number_recursion_ref_sq
85
- [:backref, :number_recursion_ref]
83
+ %i[backref number_recursion_ref]
86
84
  else
87
85
  [type, token]
88
86
  end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Anchor
4
+ Basic = %i[bol eol]
5
+ Extended = Basic + %i[word_boundary nonword_boundary]
6
+ String = %i[bos eos eos_ob_eol]
7
+ MatchStart = %i[match_start]
8
+
9
+ All = Extended + String + MatchStart
10
+ Type = :anchor
11
+ end
12
+
13
+ Map[Anchor::Type] = Anchor::All
14
+ end
15
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Assertion
4
- Lookahead = [:lookahead, :nlookahead]
5
- Lookbehind = [:lookbehind, :nlookbehind]
4
+ Lookahead = %i[lookahead nlookahead]
5
+ Lookbehind = %i[lookbehind nlookbehind]
6
6
 
7
7
  All = Lookahead + Lookbehind
8
8
  Type = :assertion
@@ -0,0 +1,30 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Backreference
4
+ Plain = %i[number]
5
+ NumberRef = %i[number_ref number_rel_ref]
6
+ Number = Plain + NumberRef
7
+ Name = %i[name_ref]
8
+
9
+ RecursionLevel = %i[name_recursion_ref number_recursion_ref]
10
+
11
+ V1_8_6 = Plain
12
+
13
+ V1_9_1 = Name + NumberRef + RecursionLevel
14
+
15
+ All = V1_8_6 + V1_9_1
16
+ Type = :backref
17
+ end
18
+
19
+ # Type is the same as Backreference so keeping it here, for now.
20
+ module SubexpressionCall
21
+ Name = %i[name_call]
22
+ Number = %i[number_call number_rel_call]
23
+
24
+ All = Name + Number
25
+ end
26
+
27
+ Map[Backreference::Type] = Backreference::All +
28
+ SubexpressionCall::All
29
+ end
30
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
4
+ Basic = %i[open close negate range]
5
+ Extended = Basic + %i[intersection]
6
6
 
7
7
  All = Extended
8
8
  Type = :set
@@ -2,10 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterType
4
4
  Basic = []
5
- Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
6
- Hex = [:hex, :nonhex]
5
+ Extended = %i[digit nondigit space nonspace word nonword]
6
+ Hex = %i[hex nonhex]
7
7
 
8
- Clustered = [:linebreak, :xgrapheme]
8
+ Clustered = %i[linebreak xgrapheme]
9
9
 
10
10
  All = Basic + Extended + Hex + Clustered
11
11
  Type = :type
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Conditional
4
- Delimiters = [:open, :close]
4
+ Delimiters = %i[open close]
5
5
 
6
- Condition = [:condition_open, :condition, :condition_close]
7
- Separator = [:separator]
6
+ Condition = %i[condition_open condition condition_close]
7
+ Separator = %i[separator]
8
8
 
9
9
  All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
10
10
 
@@ -0,0 +1,31 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
4
+ module Escape
5
+ Basic = %i[backslash literal]
6
+
7
+ Control = %i[control meta_sequence]
8
+
9
+ ASCII = %i[bell backspace escape form_feed newline carriage
10
+ tab vertical_tab]
11
+
12
+ Unicode = %i[codepoint codepoint_list]
13
+
14
+ Meta = %i[dot alternation
15
+ zero_or_one zero_or_more one_or_more
16
+ bol eol
17
+ group_open group_close
18
+ interval_open interval_close
19
+ set_open set_close]
20
+
21
+ Hex = %i[hex]
22
+
23
+ Octal = %i[octal]
24
+
25
+ All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
26
+ Type = :escape
27
+ end
28
+
29
+ Map[Escape::Type] = Escape::All
30
+ end
31
+ end
@@ -1,18 +1,18 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Group
4
- Basic = [:capture, :close]
5
- Extended = Basic + [:options, :options_switch]
4
+ Basic = %i[capture close]
5
+ Extended = Basic + %i[options options_switch]
6
6
 
7
- Named = [:named]
8
- Atomic = [:atomic]
9
- Passive = [:passive]
10
- Comment = [:comment]
7
+ Named = %i[named]
8
+ Atomic = %i[atomic]
9
+ Passive = %i[passive]
10
+ Comment = %i[comment]
11
11
 
12
12
  V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
13
13
  Group::Passive + Group::Comment
14
14
 
15
- V2_4_1 = [:absence]
15
+ V2_4_1 = %i[absence]
16
16
 
17
17
  All = V1_8_6 + V2_4_1
18
18
  Type = :group
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Keep
4
- Mark = [:mark]
4
+ Mark = %i[mark]
5
5
 
6
6
  All = Mark
7
7
  Type = :keep
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
4
+ Basic = %i[dot]
5
+ Extended = Basic + %i[alternation]
6
6
 
7
7
  All = Extended
8
8
  Type = :meta
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module PosixClass
4
- Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
5
- :lower, :print, :punct, :space, :upper, :xdigit]
4
+ Standard = %i[alnum alpha blank cntrl digit graph
5
+ lower print punct space upper xdigit]
6
6
 
7
- Extensions = [:ascii, :word]
7
+ Extensions = %i[ascii word]
8
8
 
9
9
  All = Standard + Extensions
10
10
  Type = :posixclass
@@ -0,0 +1,35 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Quantifier
4
+ Greedy = %i[
5
+ zero_or_one
6
+ zero_or_more
7
+ one_or_more
8
+ ]
9
+
10
+ Reluctant = %i[
11
+ zero_or_one_reluctant
12
+ zero_or_more_reluctant
13
+ one_or_more_reluctant
14
+ ]
15
+
16
+ Possessive = %i[
17
+ zero_or_one_possessive
18
+ zero_or_more_possessive
19
+ one_or_more_possessive
20
+ ]
21
+
22
+ Interval = %i[interval]
23
+ IntervalReluctant = %i[interval_reluctant]
24
+ IntervalPossessive = %i[interval_possessive]
25
+
26
+ IntervalAll = Interval + IntervalReluctant +
27
+ IntervalPossessive
28
+
29
+ All = Greedy + Reluctant + Possessive + IntervalAll
30
+ Type = :quantifier
31
+ end
32
+
33
+ Map[Quantifier::Type] = Quantifier::All
34
+ end
35
+ end