regexp_parser 2.1.1 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +19 -1
  3. data/LICENSE +1 -1
  4. data/README.md +17 -23
  5. data/Rakefile +10 -19
  6. data/lib/regexp_parser/expression/base.rb +123 -0
  7. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  8. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  9. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  12. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  13. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  14. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  15. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  16. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  17. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  18. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  19. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  20. data/lib/regexp_parser/expression/sequence.rb +0 -1
  21. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  22. data/lib/regexp_parser/expression.rb +6 -130
  23. data/lib/regexp_parser/lexer.rb +7 -5
  24. data/lib/regexp_parser/scanner/properties/long.csv +604 -0
  25. data/lib/regexp_parser/scanner/properties/short.csv +242 -0
  26. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  27. data/lib/regexp_parser/scanner.rb +126 -124
  28. data/lib/regexp_parser/syntax/any.rb +1 -3
  29. data/lib/regexp_parser/syntax/base.rb +12 -14
  30. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  31. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  32. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  33. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  35. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  36. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  37. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  38. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  39. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  40. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  41. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  43. data/lib/regexp_parser/syntax/token.rb +45 -0
  44. data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
  45. data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
  46. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  47. data/lib/regexp_parser/syntax.rb +1 -1
  48. data/lib/regexp_parser/token.rb +9 -20
  49. data/lib/regexp_parser/version.rb +1 -1
  50. data/lib/regexp_parser.rb +0 -2
  51. data/regexp_parser.gemspec +20 -22
  52. metadata +32 -164
  53. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  54. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  55. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  56. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  57. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  58. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  59. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  60. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  61. data/spec/expression/base_spec.rb +0 -104
  62. data/spec/expression/clone_spec.rb +0 -152
  63. data/spec/expression/conditional_spec.rb +0 -89
  64. data/spec/expression/free_space_spec.rb +0 -27
  65. data/spec/expression/methods/match_length_spec.rb +0 -161
  66. data/spec/expression/methods/match_spec.rb +0 -25
  67. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  68. data/spec/expression/methods/tests_spec.rb +0 -99
  69. data/spec/expression/methods/traverse_spec.rb +0 -161
  70. data/spec/expression/options_spec.rb +0 -128
  71. data/spec/expression/subexpression_spec.rb +0 -50
  72. data/spec/expression/to_h_spec.rb +0 -26
  73. data/spec/expression/to_s_spec.rb +0 -108
  74. data/spec/lexer/all_spec.rb +0 -22
  75. data/spec/lexer/conditionals_spec.rb +0 -53
  76. data/spec/lexer/delimiters_spec.rb +0 -68
  77. data/spec/lexer/escapes_spec.rb +0 -14
  78. data/spec/lexer/keep_spec.rb +0 -10
  79. data/spec/lexer/literals_spec.rb +0 -64
  80. data/spec/lexer/nesting_spec.rb +0 -99
  81. data/spec/lexer/refcalls_spec.rb +0 -60
  82. data/spec/parser/all_spec.rb +0 -43
  83. data/spec/parser/alternation_spec.rb +0 -88
  84. data/spec/parser/anchors_spec.rb +0 -17
  85. data/spec/parser/conditionals_spec.rb +0 -179
  86. data/spec/parser/errors_spec.rb +0 -30
  87. data/spec/parser/escapes_spec.rb +0 -121
  88. data/spec/parser/free_space_spec.rb +0 -130
  89. data/spec/parser/groups_spec.rb +0 -108
  90. data/spec/parser/keep_spec.rb +0 -6
  91. data/spec/parser/options_spec.rb +0 -28
  92. data/spec/parser/posix_classes_spec.rb +0 -8
  93. data/spec/parser/properties_spec.rb +0 -115
  94. data/spec/parser/quantifiers_spec.rb +0 -68
  95. data/spec/parser/refcalls_spec.rb +0 -117
  96. data/spec/parser/set/intersections_spec.rb +0 -127
  97. data/spec/parser/set/ranges_spec.rb +0 -111
  98. data/spec/parser/sets_spec.rb +0 -178
  99. data/spec/parser/types_spec.rb +0 -18
  100. data/spec/scanner/all_spec.rb +0 -18
  101. data/spec/scanner/anchors_spec.rb +0 -21
  102. data/spec/scanner/conditionals_spec.rb +0 -128
  103. data/spec/scanner/delimiters_spec.rb +0 -52
  104. data/spec/scanner/errors_spec.rb +0 -67
  105. data/spec/scanner/escapes_spec.rb +0 -64
  106. data/spec/scanner/free_space_spec.rb +0 -165
  107. data/spec/scanner/groups_spec.rb +0 -61
  108. data/spec/scanner/keep_spec.rb +0 -10
  109. data/spec/scanner/literals_spec.rb +0 -39
  110. data/spec/scanner/meta_spec.rb +0 -18
  111. data/spec/scanner/options_spec.rb +0 -36
  112. data/spec/scanner/properties_spec.rb +0 -64
  113. data/spec/scanner/quantifiers_spec.rb +0 -25
  114. data/spec/scanner/refcalls_spec.rb +0 -55
  115. data/spec/scanner/sets_spec.rb +0 -151
  116. data/spec/scanner/types_spec.rb +0 -14
  117. data/spec/spec_helper.rb +0 -16
  118. data/spec/support/runner.rb +0 -42
  119. data/spec/support/shared_examples.rb +0 -77
  120. data/spec/support/warning_extractor.rb +0 -60
  121. data/spec/syntax/syntax_spec.rb +0 -48
  122. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  123. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  124. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  125. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  126. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  127. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  128. data/spec/syntax/versions/aliases_spec.rb +0 -37
  129. data/spec/token/token_spec.rb +0 -85
@@ -1,15 +1,13 @@
1
1
  module Regexp::Syntax
2
-
3
2
  # A syntax that always returns true, passing all tokens as implemented. This
4
3
  # is useful during development, testing, and should be useful for some types
5
4
  # of transformations as well.
6
5
  class Any < Base
7
6
  def initialize # rubocop:disable Lint/MissingSuper
8
- @implements = { :* => [:*] }
7
+ @implements = { :* => %i[*] }
9
8
  end
10
9
 
11
10
  def implements?(_type, _token) true end
12
11
  def implements!(_type, _token) true end
13
12
  end
14
-
15
13
  end
@@ -1,5 +1,3 @@
1
- require 'set'
2
-
3
1
  module Regexp::Syntax
4
2
  class NotImplementedError < Regexp::Syntax::SyntaxError
5
3
  def initialize(syntax, type, token)
@@ -23,15 +21,15 @@ module Regexp::Syntax
23
21
  end
24
22
 
25
23
  def implementations(type)
26
- @implements[type] ||= Set.new
24
+ @implements[type] ||= []
27
25
  end
28
26
 
29
27
  def implements(type, tokens)
30
- implementations(type).merge(Array(tokens))
28
+ implementations(type).concat(Array(tokens))
31
29
  end
32
30
 
33
31
  def excludes(type, tokens)
34
- implementations(type).subtract(Array(tokens))
32
+ Array(tokens).each { |tok| implementations(type).delete(tok) }
35
33
  end
36
34
 
37
35
  def implements?(type, token)
@@ -59,7 +57,7 @@ module Regexp::Syntax
59
57
  def normalize_group(type, token)
60
58
  case token
61
59
  when :named_ab, :named_sq
62
- [:group, :named]
60
+ %i[group named]
63
61
  else
64
62
  [type, token]
65
63
  end
@@ -68,21 +66,21 @@ module Regexp::Syntax
68
66
  def normalize_backref(type, token)
69
67
  case token
70
68
  when :name_ref_ab, :name_ref_sq
71
- [:backref, :name_ref]
69
+ %i[backref name_ref]
72
70
  when :name_call_ab, :name_call_sq
73
- [:backref, :name_call]
71
+ %i[backref name_call]
74
72
  when :name_recursion_ref_ab, :name_recursion_ref_sq
75
- [:backref, :name_recursion_ref]
73
+ %i[backref name_recursion_ref]
76
74
  when :number_ref_ab, :number_ref_sq
77
- [:backref, :number_ref]
75
+ %i[backref number_ref]
78
76
  when :number_call_ab, :number_call_sq
79
- [:backref, :number_call]
77
+ %i[backref number_call]
80
78
  when :number_rel_ref_ab, :number_rel_ref_sq
81
- [:backref, :number_rel_ref]
79
+ %i[backref number_rel_ref]
82
80
  when :number_rel_call_ab, :number_rel_call_sq
83
- [:backref, :number_rel_call]
81
+ %i[backref number_rel_call]
84
82
  when :number_recursion_ref_ab, :number_recursion_ref_sq
85
- [:backref, :number_recursion_ref]
83
+ %i[backref number_recursion_ref]
86
84
  else
87
85
  [type, token]
88
86
  end
@@ -0,0 +1,15 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Anchor
4
+ Basic = %i[bol eol]
5
+ Extended = Basic + %i[word_boundary nonword_boundary]
6
+ String = %i[bos eos eos_ob_eol]
7
+ MatchStart = %i[match_start]
8
+
9
+ All = Extended + String + MatchStart
10
+ Type = :anchor
11
+ end
12
+
13
+ Map[Anchor::Type] = Anchor::All
14
+ end
15
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Assertion
4
- Lookahead = [:lookahead, :nlookahead]
5
- Lookbehind = [:lookbehind, :nlookbehind]
4
+ Lookahead = %i[lookahead nlookahead]
5
+ Lookbehind = %i[lookbehind nlookbehind]
6
6
 
7
7
  All = Lookahead + Lookbehind
8
8
  Type = :assertion
@@ -0,0 +1,30 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Backreference
4
+ Plain = %i[number]
5
+ NumberRef = %i[number_ref number_rel_ref]
6
+ Number = Plain + NumberRef
7
+ Name = %i[name_ref]
8
+
9
+ RecursionLevel = %i[name_recursion_ref number_recursion_ref]
10
+
11
+ V1_8_6 = Plain
12
+
13
+ V1_9_1 = Name + NumberRef + RecursionLevel
14
+
15
+ All = V1_8_6 + V1_9_1
16
+ Type = :backref
17
+ end
18
+
19
+ # Type is the same as Backreference so keeping it here, for now.
20
+ module SubexpressionCall
21
+ Name = %i[name_call]
22
+ Number = %i[number_call number_rel_call]
23
+
24
+ All = Name + Number
25
+ end
26
+
27
+ Map[Backreference::Type] = Backreference::All +
28
+ SubexpressionCall::All
29
+ end
30
+ end
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterSet
4
- Basic = [:open, :close, :negate, :range]
5
- Extended = Basic + [:intersection]
4
+ Basic = %i[open close negate range]
5
+ Extended = Basic + %i[intersection]
6
6
 
7
7
  All = Extended
8
8
  Type = :set
@@ -2,10 +2,10 @@ module Regexp::Syntax
2
2
  module Token
3
3
  module CharacterType
4
4
  Basic = []
5
- Extended = [:digit, :nondigit, :space, :nonspace, :word, :nonword]
6
- Hex = [:hex, :nonhex]
5
+ Extended = %i[digit nondigit space nonspace word nonword]
6
+ Hex = %i[hex nonhex]
7
7
 
8
- Clustered = [:linebreak, :xgrapheme]
8
+ Clustered = %i[linebreak xgrapheme]
9
9
 
10
10
  All = Basic + Extended + Hex + Clustered
11
11
  Type = :type
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Conditional
4
- Delimiters = [:open, :close]
4
+ Delimiters = %i[open close]
5
5
 
6
- Condition = [:condition_open, :condition, :condition_close]
7
- Separator = [:separator]
6
+ Condition = %i[condition_open condition condition_close]
7
+ Separator = %i[separator]
8
8
 
9
9
  All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
10
10
 
@@ -0,0 +1,31 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ # TODO: unify naming with RE::EscapeSequence, on way or the other, in v3.0.0
4
+ module Escape
5
+ Basic = %i[backslash literal]
6
+
7
+ Control = %i[control meta_sequence]
8
+
9
+ ASCII = %i[bell backspace escape form_feed newline carriage
10
+ tab vertical_tab]
11
+
12
+ Unicode = %i[codepoint codepoint_list]
13
+
14
+ Meta = %i[dot alternation
15
+ zero_or_one zero_or_more one_or_more
16
+ bol eol
17
+ group_open group_close
18
+ interval_open interval_close
19
+ set_open set_close]
20
+
21
+ Hex = %i[hex]
22
+
23
+ Octal = %i[octal]
24
+
25
+ All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
26
+ Type = :escape
27
+ end
28
+
29
+ Map[Escape::Type] = Escape::All
30
+ end
31
+ end
@@ -1,18 +1,18 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Group
4
- Basic = [:capture, :close]
5
- Extended = Basic + [:options, :options_switch]
4
+ Basic = %i[capture close]
5
+ Extended = Basic + %i[options options_switch]
6
6
 
7
- Named = [:named]
8
- Atomic = [:atomic]
9
- Passive = [:passive]
10
- Comment = [:comment]
7
+ Named = %i[named]
8
+ Atomic = %i[atomic]
9
+ Passive = %i[passive]
10
+ Comment = %i[comment]
11
11
 
12
12
  V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
13
13
  Group::Passive + Group::Comment
14
14
 
15
- V2_4_1 = [:absence]
15
+ V2_4_1 = %i[absence]
16
16
 
17
17
  All = V1_8_6 + V2_4_1
18
18
  Type = :group
@@ -1,7 +1,7 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Keep
4
- Mark = [:mark]
4
+ Mark = %i[mark]
5
5
 
6
6
  All = Mark
7
7
  Type = :keep
@@ -1,8 +1,8 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module Meta
4
- Basic = [:dot]
5
- Extended = Basic + [:alternation]
4
+ Basic = %i[dot]
5
+ Extended = Basic + %i[alternation]
6
6
 
7
7
  All = Extended
8
8
  Type = :meta
@@ -1,10 +1,10 @@
1
1
  module Regexp::Syntax
2
2
  module Token
3
3
  module PosixClass
4
- Standard = [:alnum, :alpha, :blank, :cntrl, :digit, :graph,
5
- :lower, :print, :punct, :space, :upper, :xdigit]
4
+ Standard = %i[alnum alpha blank cntrl digit graph
5
+ lower print punct space upper xdigit]
6
6
 
7
- Extensions = [:ascii, :word]
7
+ Extensions = %i[ascii word]
8
8
 
9
9
  All = Standard + Extensions
10
10
  Type = :posixclass
@@ -0,0 +1,35 @@
1
+ module Regexp::Syntax
2
+ module Token
3
+ module Quantifier
4
+ Greedy = %i[
5
+ zero_or_one
6
+ zero_or_more
7
+ one_or_more
8
+ ]
9
+
10
+ Reluctant = %i[
11
+ zero_or_one_reluctant
12
+ zero_or_more_reluctant
13
+ one_or_more_reluctant
14
+ ]
15
+
16
+ Possessive = %i[
17
+ zero_or_one_possessive
18
+ zero_or_more_possessive
19
+ one_or_more_possessive
20
+ ]
21
+
22
+ Interval = %i[interval]
23
+ IntervalReluctant = %i[interval_reluctant]
24
+ IntervalPossessive = %i[interval_possessive]
25
+
26
+ IntervalAll = Interval + IntervalReluctant +
27
+ IntervalPossessive
28
+
29
+ All = Greedy + Reluctant + Possessive + IntervalAll
30
+ Type = :quantifier
31
+ end
32
+
33
+ Map[Quantifier::Type] = Quantifier::All
34
+ end
35
+ end