regexp_parser 1.7.0 → 2.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +364 -22
  3. data/Gemfile +8 -2
  4. data/LICENSE +1 -1
  5. data/README.md +124 -88
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +76 -0
  9. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  10. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  11. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  18. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  22. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  23. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  24. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  25. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  26. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  27. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  28. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  29. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  30. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  31. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  32. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  33. data/lib/regexp_parser/expression/sequence.rb +11 -47
  34. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  35. data/lib/regexp_parser/expression/shared.rb +111 -0
  36. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  37. data/lib/regexp_parser/expression.rb +14 -141
  38. data/lib/regexp_parser/lexer.rb +83 -41
  39. data/lib/regexp_parser/parser.rb +371 -429
  40. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  41. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  42. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  43. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  44. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  45. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  46. data/lib/regexp_parser/scanner/property.rl +4 -4
  47. data/lib/regexp_parser/scanner/scanner.rl +295 -368
  48. data/lib/regexp_parser/scanner.rb +1405 -1674
  49. data/lib/regexp_parser/syntax/any.rb +2 -7
  50. data/lib/regexp_parser/syntax/base.rb +92 -67
  51. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  52. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  53. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  54. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  55. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  56. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  57. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  58. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  59. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  60. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  61. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  62. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  63. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  64. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  65. data/lib/regexp_parser/syntax/token.rb +45 -0
  66. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  67. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  68. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  69. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  70. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  71. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  75. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  79. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  81. data/lib/regexp_parser/syntax/versions.rb +3 -1
  82. data/lib/regexp_parser/syntax.rb +8 -6
  83. data/lib/regexp_parser/token.rb +9 -20
  84. data/lib/regexp_parser/version.rb +1 -1
  85. data/lib/regexp_parser.rb +0 -2
  86. data/regexp_parser.gemspec +20 -22
  87. metadata +49 -166
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,23 +1,19 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
- complete? && raise("Can't add more than 2 expressions to a Range")
9
+ complete? and raise Regexp::Parser::Error,
10
+ "Can't add more than 2 expressions to a Range"
11
11
  super
12
12
  end
13
13
 
14
14
  def complete?
15
15
  count == 2
16
16
  end
17
-
18
- def to_s(_format = :full)
19
- expressions.join(text)
20
- end
21
17
  end
22
18
  end
23
19
  end
@@ -19,9 +19,8 @@ module Regexp::Expression
19
19
  def close
20
20
  self.closed = true
21
21
  end
22
-
23
- def to_s(format = :full)
24
- "#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
25
- end
26
22
  end
23
+
24
+ # alias for symmetry between token symbol and Expression class name
25
+ Set = CharacterSet
27
26
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Conditional
3
- class TooManyBranches < StandardError
3
+ class TooManyBranches < Regexp::Parser::Error
4
4
  def initialize
5
5
  super('The conditional expression has more than 2 branches')
6
6
  end
@@ -15,6 +15,11 @@ module Regexp::Expression
15
15
  ref = text.tr("'<>()", "")
16
16
  ref =~ /\D/ ? ref : Integer(ref)
17
17
  end
18
+
19
+ def initialize_copy(orig)
20
+ self.referenced_expression = orig.referenced_expression.dup
21
+ super
22
+ end
18
23
  end
19
24
 
20
25
  class Branch < Regexp::Expression::Sequence; end
@@ -26,9 +31,9 @@ module Regexp::Expression
26
31
  expressions.last << exp
27
32
  end
28
33
 
29
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
30
35
  raise TooManyBranches.new if branches.length == 2
31
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
32
37
  Branch.add_to(self, params, active_opts)
33
38
  end
34
39
  alias :branch :add_sequence
@@ -50,8 +55,9 @@ module Regexp::Expression
50
55
  condition.reference
51
56
  end
52
57
 
53
- def to_s(format = :full)
54
- "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
58
+ def initialize_copy(orig)
59
+ self.referenced_expression = orig.referenced_expression.dup
60
+ super
55
61
  end
56
62
  end
57
63
  end
@@ -1,16 +1,21 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
3
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
4
  def codepoint
12
5
  char.ord
13
6
  end
7
+
8
+ if ''.respond_to?(:undump)
9
+ def char
10
+ %("#{text}").undump
11
+ end
12
+ else
13
+ # poor man's unescape without using eval
14
+ require 'yaml'
15
+ def char
16
+ YAML.load(%Q(---\n"#{text}"\n))
17
+ end
18
+ end
14
19
  end
15
20
 
16
21
  class Literal < EscapeSequence::Base
@@ -91,4 +96,7 @@ module Regexp::Expression
91
96
  end
92
97
  end
93
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
94
102
  end
@@ -1,17 +1,17 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
5
- raise "Can not quantify a free space object"
3
+ def quantify(*_args)
4
+ raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
9
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
10
10
 
11
11
  class WhiteSpace < Regexp::Expression::FreeSpace
12
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
13
14
  text << exp.text
14
15
  end
15
16
  end
16
-
17
17
  end
@@ -1,27 +1,45 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def to_s(format = :full)
5
- "#{text}#{expressions.join})#{quantifier_affix(format)}"
6
- end
4
+ end
7
5
 
8
- def capturing?; false end
6
+ class Passive < Group::Base
7
+ attr_writer :implicit
8
+
9
+ def initialize(*)
10
+ @implicit = false
11
+ super
12
+ end
9
13
 
10
- def comment?; false end
14
+ def implicit?
15
+ @implicit
16
+ end
11
17
  end
12
18
 
13
- class Atomic < Group::Base; end
14
- class Passive < Group::Base; end
15
19
  class Absence < Group::Base; end
20
+ class Atomic < Group::Base; end
21
+ # TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
22
+ # longer inherit from Group because it is effectively a terminal expression.
16
23
  class Options < Group::Base
17
24
  attr_accessor :option_changes
25
+
26
+ def initialize_copy(orig)
27
+ self.option_changes = orig.option_changes.dup
28
+ super
29
+ end
30
+
31
+ def quantify(*args)
32
+ if token == :options_switch
33
+ raise Regexp::Parser::Error, 'Can not quantify an option switch'
34
+ else
35
+ super
36
+ end
37
+ end
18
38
  end
19
39
 
20
40
  class Capture < Group::Base
21
41
  attr_accessor :number, :number_at_level
22
42
  alias identifier number
23
-
24
- def capturing?; true end
25
43
  end
26
44
 
27
45
  class Named < Group::Capture
@@ -33,18 +51,13 @@ module Regexp::Expression
33
51
  super
34
52
  end
35
53
 
36
- def initialize_clone(orig)
54
+ def initialize_copy(orig)
37
55
  @name = orig.name.dup
38
56
  super
39
57
  end
40
58
  end
41
59
 
42
60
  class Comment < Group::Base
43
- def to_s(_format = :full)
44
- text.dup
45
- end
46
-
47
- def comment?; true end
48
61
  end
49
62
  end
50
63
 
@@ -1,5 +1,7 @@
1
1
  module Regexp::Expression
2
2
  module Keep
3
+ # TOOD: in regexp_parser v3.0.0 this should possibly be a Subexpression
4
+ # that contains all expressions to its left.
3
5
  class Mark < Regexp::Expression::Base; end
4
6
  end
5
7
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -5,7 +5,11 @@ module Regexp::Expression
5
5
  end
6
6
 
7
7
  def name
8
- token.to_s
8
+ text[/\w+/]
9
9
  end
10
10
  end
11
+
12
+ # alias for symmetry between token symbol and Expression class name
13
+ Posixclass = PosixClass
14
+ Nonposixclass = PosixClass
11
15
  end
@@ -1,24 +1,9 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
5
- def initialize(*args)
6
- unless args.first.is_a?(Regexp::Token)
7
- warn('WARNING: Root.new without a Token argument is deprecated and '\
8
- 'will be removed in 2.0.0. Use Root.build for the old behavior.')
9
- return super(self.class.build_token, *args)
10
- end
11
- super
12
- end
13
-
14
- class << self
15
- def build(options = {})
16
- new(build_token, options)
17
- end
18
-
19
- def build_token
20
- Regexp::Token.new(:expression, :root, '', 0)
21
- end
3
+ def self.build(options = {})
4
+ warn "`#{self.class}.build(options)` is deprecated and will raise in "\
5
+ "regexp_parser v3.0.0. Please use `.construct(options: options)`."
6
+ construct(options: options)
22
7
  end
23
8
  end
24
9
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -7,11 +6,11 @@ module Regexp::Expression
7
6
  end
8
7
 
9
8
  def name
10
- text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
9
+ text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
10
  end
12
11
 
13
12
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
13
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
14
  end
16
15
  end
17
16
 
@@ -117,4 +116,7 @@ module Regexp::Expression
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
118
 
119
+ # alias for symmetry between token symbol and Expression class name
120
+ Property = UnicodeProperty
121
+ Nonproperty = UnicodeProperty
120
122
  end # module Regexp::Expression
@@ -0,0 +1,41 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods
4
+ # Convenience method to init a valid Expression without a Regexp::Token
5
+ def construct(params = {})
6
+ attrs = construct_defaults.merge(params)
7
+ options = attrs.delete(:options)
8
+ token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
9
+ token = Regexp::Token.new(*token_args)
10
+ raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
11
+
12
+ new(token, options)
13
+ end
14
+
15
+ def construct_defaults
16
+ if self == Root
17
+ { type: :expression, token: :root, ts: 0 }
18
+ elsif self < Sequence
19
+ { type: :expression, token: :sequence }
20
+ else
21
+ { type: token_class::Type }
22
+ end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
23
+ end
24
+
25
+ def token_class
26
+ if self == Root || self < Sequence
27
+ nil # no token class because these objects are Parser-generated
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
+ Regexp::Syntax::Token::Meta
31
+ else
32
+ Regexp::Syntax::Token.const_get(name.split('::')[2])
33
+ end
34
+ end
35
+ end
36
+
37
+ def token_class
38
+ self.class.token_class
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation, e.g. "atomic group", "hex escape", "word type", ..
4
+ def human_name
5
+ [token, type].compact.join(' ').tr('_', ' ')
6
+ end
7
+ end
8
+
9
+ Alternation.class_eval { def human_name; 'alternation' end }
10
+ Alternative.class_eval { def human_name; 'alternative' end }
11
+ Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
12
+ Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
13
+ Anchor::EOL.class_eval { def human_name; 'end of line' end }
14
+ Anchor::EOS.class_eval { def human_name; 'end of string' end }
15
+ Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
16
+ Anchor::MatchStart.class_eval { def human_name; 'match start' end }
17
+ Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
18
+ Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
19
+ Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
20
+ Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
21
+ Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
22
+ Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
23
+ Backreference::Name.class_eval { def human_name; 'backreference by name' end }
24
+ Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
25
+ Backreference::Number.class_eval { def human_name; 'backreference' end }
26
+ Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
27
+ Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
28
+ Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
29
+ CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
30
+ CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
31
+ CharacterSet::Range.class_eval { def human_name; 'character range' end }
32
+ CharacterType::Any.class_eval { def human_name; 'match-all' end }
33
+ Comment.class_eval { def human_name; 'comment' end }
34
+ Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
35
+ Conditional::Condition.class_eval { def human_name; 'condition' end }
36
+ Conditional::Expression.class_eval { def human_name; 'conditional' end }
37
+ Group::Capture.class_eval { def human_name; "capture group #{number}" end }
38
+ Group::Named.class_eval { def human_name; 'named capture group' end }
39
+ Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
40
+ Literal.class_eval { def human_name; 'literal' end }
41
+ Root.class_eval { def human_name; 'root' end }
42
+ WhiteSpace.class_eval { def human_name; 'free space' end }
43
+ end
@@ -10,7 +10,7 @@ class Regexp::MatchLength
10
10
  self.exp_class = exp.class
11
11
  self.min_rep = exp.repetitions.min
12
12
  self.max_rep = exp.repetitions.max
13
- if base = opts[:base]
13
+ if (base = opts[:base])
14
14
  self.base_min = base
15
15
  self.base_max = base
16
16
  self.reify = ->{ '.' * base }
@@ -32,7 +32,7 @@ class Regexp::MatchLength
32
32
  end
33
33
  end
34
34
 
35
- def endless_each(&block)
35
+ def endless_each
36
36
  return enum_for(__method__) unless block_given?
37
37
  (min..max).each { |num| yield(num) if include?(num) }
38
38
  end
@@ -63,16 +63,20 @@ class Regexp::MatchLength
63
63
  end
64
64
 
65
65
  def to_re
66
- "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
66
+ /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
67
67
  end
68
68
 
69
69
  private
70
70
 
71
71
  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
72
 
73
- def test_regexp
74
- @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
- regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
73
+ if Regexp.method_defined?(:match?) # ruby >= 2.4
74
+ def test_regexp
75
+ @test_regexp ||= /^#{to_re}$/
76
+ end
77
+ else
78
+ def test_regexp
79
+ @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
76
80
  end
77
81
  end
78
82
  end
@@ -112,7 +116,7 @@ module Regexp::Expression
112
116
  end
113
117
 
114
118
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
119
+ dummy = Regexp::Expression::Root.construct
116
120
  dummy.expressions = expressions.map(&:clone)
117
121
  dummy.quantifier = quantifier && quantifier.clone
118
122
  dummy.match_length
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,51 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ #
99
+ # When changing the conditions, please make sure to update
100
+ # #pretty_print_instance_variables so that it includes all relevant values.
101
+ def ==(other)
102
+ self.class == other.class &&
103
+ text == other.text &&
104
+ quantifier == other.quantifier &&
105
+ options == other.options &&
106
+ (terminal? || expressions == other.expressions)
107
+ end
108
+ alias :=== :==
109
+ alias :eql? :==
110
+
111
+ def optional?
112
+ quantified? && quantifier.min == 0
113
+ end
114
+
115
+ def quantified?
116
+ !quantifier.nil?
117
+ end
96
118
  end
119
+
120
+ Shared.class_eval { def terminal?; self.class.terminal? end }
121
+ Shared::ClassMethods.class_eval { def terminal?; true end }
122
+ Subexpression.instance_eval { def terminal?; false end }
123
+
124
+ Shared.class_eval { def capturing?; self.class.capturing? end }
125
+ Shared::ClassMethods.class_eval { def capturing?; false end }
126
+ Group::Capture.instance_eval { def capturing?; true end }
127
+
128
+ Shared.class_eval { def comment?; self.class.comment? end }
129
+ Shared::ClassMethods.class_eval { def comment?; false end }
130
+ Comment.instance_eval { def comment?; true end }
131
+ Group::Comment.instance_eval { def comment?; true end }
132
+
133
+ Shared.class_eval { def decorative?; self.class.decorative? end }
134
+ Shared::ClassMethods.class_eval { def decorative?; false end }
135
+ FreeSpace.instance_eval { def decorative?; true end }
136
+ Group::Comment.instance_eval { def decorative?; true end }
137
+
138
+ Shared.class_eval { def referential?; self.class.referential? end }
139
+ Shared::ClassMethods.class_eval { def referential?; false end }
140
+ Backreference::Base.instance_eval { def referential?; true end }
141
+ Conditional::Condition.instance_eval { def referential?; true end }
142
+ Conditional::Expression.instance_eval { def referential?; true end }
97
143
  end
@@ -1,6 +1,22 @@
1
1
  module Regexp::Expression
2
2
  class Subexpression < Regexp::Expression::Base
3
3
 
4
+ # Traverses the expression, passing each recursive child to the
5
+ # given block.
6
+ # If the block takes two arguments, the indices of the children within
7
+ # their parents are also passed to it.
8
+ def each_expression(include_self = false, &block)
9
+ return enum_for(__method__, include_self) unless block
10
+
11
+ if block.arity == 1
12
+ block.call(self) if include_self
13
+ each_expression_without_index(&block)
14
+ else
15
+ block.call(self, 0) if include_self
16
+ each_expression_with_index(&block)
17
+ end
18
+ end
19
+
4
20
  # Traverses the subexpression (depth-first, pre-order) and calls the given
5
21
  # block for each expression with three arguments; the traversal event,
6
22
  # the expression, and the index of the expression within its parent.
@@ -34,31 +50,31 @@ module Regexp::Expression
34
50
  end
35
51
  alias :walk :traverse
36
52
 
37
- # Iterates over the expressions of this expression as an array, passing
38
- # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false, &block)
40
- return enum_for(__method__, include_self) unless block_given?
41
-
42
- traverse(include_self) do |event, exp, index|
43
- yield(exp, index) unless event == :exit
44
- end
45
- end
46
-
47
53
  # Returns a new array with the results of calling the given block once
48
54
  # for every expression. If a block is not given, returns an array with
49
55
  # each expression and its level index as an array.
50
56
  def flat_map(include_self = false, &block)
51
- result = []
57
+ case block && block.arity
58
+ when nil then each_expression(include_self).to_a
59
+ when 2 then each_expression(include_self).map(&block)
60
+ else each_expression(include_self).map { |exp| block.call(exp) }
61
+ end
62
+ end
52
63
 
53
- each_expression(include_self) do |exp, index|
54
- if block_given?
55
- result << yield(exp, index)
56
- else
57
- result << [exp, index]
58
- end
64
+ protected
65
+
66
+ def each_expression_with_index(&block)
67
+ each_with_index do |exp, index|
68
+ block.call(exp, index)
69
+ exp.each_expression_with_index(&block) unless exp.terminal?
59
70
  end
71
+ end
60
72
 
61
- result
73
+ def each_expression_without_index(&block)
74
+ each do |exp|
75
+ block.call(exp)
76
+ exp.each_expression_without_index(&block) unless exp.terminal?
77
+ end
62
78
  end
63
79
  end
64
80
  end