regexp_parser 1.7.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +9 -3
  3. data/LICENSE +1 -1
  4. data/Rakefile +6 -70
  5. data/lib/regexp_parser/error.rb +4 -0
  6. data/lib/regexp_parser/expression/base.rb +76 -0
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  11. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
  12. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  15. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  16. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  17. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  18. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  19. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
  20. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  21. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
  22. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  23. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  24. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  25. data/lib/regexp_parser/expression/methods/negative.rb +20 -0
  26. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  27. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  28. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  29. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  30. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  31. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  32. data/lib/regexp_parser/expression/sequence.rb +11 -47
  33. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  34. data/lib/regexp_parser/expression/shared.rb +111 -0
  35. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  36. data/lib/regexp_parser/expression.rb +15 -141
  37. data/lib/regexp_parser/lexer.rb +83 -41
  38. data/lib/regexp_parser/parser.rb +372 -429
  39. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  40. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  41. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  42. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  43. data/lib/regexp_parser/scanner/properties/long.csv +651 -0
  44. data/lib/regexp_parser/scanner/properties/short.csv +249 -0
  45. data/lib/regexp_parser/scanner/property.rl +4 -4
  46. data/lib/regexp_parser/scanner/scanner.rl +303 -368
  47. data/lib/regexp_parser/scanner.rb +1423 -1674
  48. data/lib/regexp_parser/syntax/any.rb +2 -7
  49. data/lib/regexp_parser/syntax/base.rb +92 -67
  50. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  51. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  52. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  53. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  54. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  55. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  56. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  57. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  58. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  59. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  60. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  61. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  62. data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
  63. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  64. data/lib/regexp_parser/syntax/token.rb +45 -0
  65. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  66. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  67. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  68. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  69. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  70. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  71. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  74. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  75. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  79. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions.rb +3 -1
  81. data/lib/regexp_parser/syntax.rb +8 -6
  82. data/lib/regexp_parser/token.rb +9 -20
  83. data/lib/regexp_parser/version.rb +1 -1
  84. data/lib/regexp_parser.rb +0 -2
  85. data/regexp_parser.gemspec +19 -23
  86. metadata +53 -171
  87. data/CHANGELOG.md +0 -349
  88. data/README.md +0 -470
  89. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  90. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  91. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  92. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  93. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  94. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  95. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  96. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  97. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  98. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  99. data/spec/expression/base_spec.rb +0 -94
  100. data/spec/expression/clone_spec.rb +0 -120
  101. data/spec/expression/conditional_spec.rb +0 -89
  102. data/spec/expression/free_space_spec.rb +0 -27
  103. data/spec/expression/methods/match_length_spec.rb +0 -161
  104. data/spec/expression/methods/match_spec.rb +0 -25
  105. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  106. data/spec/expression/methods/tests_spec.rb +0 -99
  107. data/spec/expression/methods/traverse_spec.rb +0 -161
  108. data/spec/expression/options_spec.rb +0 -128
  109. data/spec/expression/root_spec.rb +0 -9
  110. data/spec/expression/sequence_spec.rb +0 -9
  111. data/spec/expression/subexpression_spec.rb +0 -50
  112. data/spec/expression/to_h_spec.rb +0 -26
  113. data/spec/expression/to_s_spec.rb +0 -100
  114. data/spec/lexer/all_spec.rb +0 -22
  115. data/spec/lexer/conditionals_spec.rb +0 -53
  116. data/spec/lexer/escapes_spec.rb +0 -14
  117. data/spec/lexer/keep_spec.rb +0 -10
  118. data/spec/lexer/literals_spec.rb +0 -89
  119. data/spec/lexer/nesting_spec.rb +0 -99
  120. data/spec/lexer/refcalls_spec.rb +0 -55
  121. data/spec/parser/all_spec.rb +0 -43
  122. data/spec/parser/alternation_spec.rb +0 -88
  123. data/spec/parser/anchors_spec.rb +0 -17
  124. data/spec/parser/conditionals_spec.rb +0 -179
  125. data/spec/parser/errors_spec.rb +0 -30
  126. data/spec/parser/escapes_spec.rb +0 -121
  127. data/spec/parser/free_space_spec.rb +0 -130
  128. data/spec/parser/groups_spec.rb +0 -108
  129. data/spec/parser/keep_spec.rb +0 -6
  130. data/spec/parser/posix_classes_spec.rb +0 -8
  131. data/spec/parser/properties_spec.rb +0 -115
  132. data/spec/parser/quantifiers_spec.rb +0 -51
  133. data/spec/parser/refcalls_spec.rb +0 -112
  134. data/spec/parser/set/intersections_spec.rb +0 -127
  135. data/spec/parser/set/ranges_spec.rb +0 -111
  136. data/spec/parser/sets_spec.rb +0 -178
  137. data/spec/parser/types_spec.rb +0 -18
  138. data/spec/scanner/all_spec.rb +0 -18
  139. data/spec/scanner/anchors_spec.rb +0 -21
  140. data/spec/scanner/conditionals_spec.rb +0 -128
  141. data/spec/scanner/errors_spec.rb +0 -68
  142. data/spec/scanner/escapes_spec.rb +0 -53
  143. data/spec/scanner/free_space_spec.rb +0 -133
  144. data/spec/scanner/groups_spec.rb +0 -52
  145. data/spec/scanner/keep_spec.rb +0 -10
  146. data/spec/scanner/literals_spec.rb +0 -49
  147. data/spec/scanner/meta_spec.rb +0 -18
  148. data/spec/scanner/properties_spec.rb +0 -64
  149. data/spec/scanner/quantifiers_spec.rb +0 -20
  150. data/spec/scanner/refcalls_spec.rb +0 -36
  151. data/spec/scanner/sets_spec.rb +0 -102
  152. data/spec/scanner/types_spec.rb +0 -14
  153. data/spec/spec_helper.rb +0 -15
  154. data/spec/support/runner.rb +0 -42
  155. data/spec/support/shared_examples.rb +0 -77
  156. data/spec/support/warning_extractor.rb +0 -60
  157. data/spec/syntax/syntax_spec.rb +0 -48
  158. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  159. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  160. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  161. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  162. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  163. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  164. data/spec/syntax/versions/aliases_spec.rb +0 -37
  165. data/spec/token/token_spec.rb +0 -85
  166. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -0,0 +1,43 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation, e.g. "atomic group", "hex escape", "word type", ..
4
+ def human_name
5
+ [token, type].compact.join(' ').tr('_', ' ')
6
+ end
7
+ end
8
+
9
+ Alternation.class_eval { def human_name; 'alternation' end }
10
+ Alternative.class_eval { def human_name; 'alternative' end }
11
+ Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
12
+ Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
13
+ Anchor::EOL.class_eval { def human_name; 'end of line' end }
14
+ Anchor::EOS.class_eval { def human_name; 'end of string' end }
15
+ Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
16
+ Anchor::MatchStart.class_eval { def human_name; 'match start' end }
17
+ Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
18
+ Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
19
+ Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
20
+ Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
21
+ Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
22
+ Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
23
+ Backreference::Name.class_eval { def human_name; 'backreference by name' end }
24
+ Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
25
+ Backreference::Number.class_eval { def human_name; 'backreference' end }
26
+ Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
27
+ Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
28
+ Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
29
+ CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
30
+ CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
31
+ CharacterSet::Range.class_eval { def human_name; 'character range' end }
32
+ CharacterType::Any.class_eval { def human_name; 'match-all' end }
33
+ Comment.class_eval { def human_name; 'comment' end }
34
+ Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
35
+ Conditional::Condition.class_eval { def human_name; 'condition' end }
36
+ Conditional::Expression.class_eval { def human_name; 'conditional' end }
37
+ Group::Capture.class_eval { def human_name; "capture group #{number}" end }
38
+ Group::Named.class_eval { def human_name; 'named capture group' end }
39
+ Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
40
+ Literal.class_eval { def human_name; 'literal' end }
41
+ Root.class_eval { def human_name; 'root' end }
42
+ WhiteSpace.class_eval { def human_name; 'free space' end }
43
+ end
@@ -10,7 +10,7 @@ class Regexp::MatchLength
10
10
  self.exp_class = exp.class
11
11
  self.min_rep = exp.repetitions.min
12
12
  self.max_rep = exp.repetitions.max
13
- if base = opts[:base]
13
+ if (base = opts[:base])
14
14
  self.base_min = base
15
15
  self.base_max = base
16
16
  self.reify = ->{ '.' * base }
@@ -32,7 +32,7 @@ class Regexp::MatchLength
32
32
  end
33
33
  end
34
34
 
35
- def endless_each(&block)
35
+ def endless_each
36
36
  return enum_for(__method__) unless block_given?
37
37
  (min..max).each { |num| yield(num) if include?(num) }
38
38
  end
@@ -63,16 +63,20 @@ class Regexp::MatchLength
63
63
  end
64
64
 
65
65
  def to_re
66
- "(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}"
66
+ /(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
67
67
  end
68
68
 
69
69
  private
70
70
 
71
71
  attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
72
72
 
73
- def test_regexp
74
- @test_regexp ||= Regexp.new("^#{to_re}$").tap do |regexp|
75
- regexp.respond_to?(:match?) || def regexp.match?(str); !!match(str) end
73
+ if Regexp.method_defined?(:match?) # ruby >= 2.4
74
+ def test_regexp
75
+ @test_regexp ||= /^#{to_re}$/
76
+ end
77
+ else
78
+ def test_regexp
79
+ @test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
76
80
  end
77
81
  end
78
82
  end
@@ -112,7 +116,7 @@ module Regexp::Expression
112
116
  end
113
117
 
114
118
  def inner_match_length
115
- dummy = Regexp::Expression::Root.build
119
+ dummy = Regexp::Expression::Root.construct
116
120
  dummy.expressions = expressions.map(&:clone)
117
121
  dummy.quantifier = quantifier && quantifier.clone
118
122
  dummy.match_length
@@ -0,0 +1,20 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def negative?
4
+ false
5
+ end
6
+
7
+ # not an alias so as to respect overrides of #negative?
8
+ def negated?
9
+ negative?
10
+ end
11
+ end
12
+
13
+ Anchor::NonWordBoundary.class_eval { def negative?; true end }
14
+ Assertion::NegativeLookahead.class_eval { def negative?; true end }
15
+ Assertion::NegativeLookbehind.class_eval { def negative?; true end }
16
+ CharacterSet.class_eval { def negative?; negative end }
17
+ CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
18
+ PosixClass.class_eval { def negative?; type == :nonposixclass end }
19
+ UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
20
+ end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- class Base
2
+ module Shared
3
3
 
4
4
  # Test if this expression has the given test_type, which can be either
5
5
  # a symbol or an array of symbols to check against the expression's type.
@@ -93,5 +93,51 @@ module Regexp::Expression
93
93
  "Array, Hash, or Symbol expected, #{scope.class.name} given"
94
94
  end
95
95
  end
96
+
97
+ # Deep-compare two expressions for equality.
98
+ #
99
+ # When changing the conditions, please make sure to update
100
+ # #pretty_print_instance_variables so that it includes all relevant values.
101
+ def ==(other)
102
+ self.class == other.class &&
103
+ text == other.text &&
104
+ quantifier == other.quantifier &&
105
+ options == other.options &&
106
+ (terminal? || expressions == other.expressions)
107
+ end
108
+ alias :=== :==
109
+ alias :eql? :==
110
+
111
+ def optional?
112
+ quantified? && quantifier.min == 0
113
+ end
114
+
115
+ def quantified?
116
+ !quantifier.nil?
117
+ end
96
118
  end
119
+
120
+ Shared.class_eval { def terminal?; self.class.terminal? end }
121
+ Shared::ClassMethods.class_eval { def terminal?; true end }
122
+ Subexpression.instance_eval { def terminal?; false end }
123
+
124
+ Shared.class_eval { def capturing?; self.class.capturing? end }
125
+ Shared::ClassMethods.class_eval { def capturing?; false end }
126
+ Group::Capture.instance_eval { def capturing?; true end }
127
+
128
+ Shared.class_eval { def comment?; self.class.comment? end }
129
+ Shared::ClassMethods.class_eval { def comment?; false end }
130
+ Comment.instance_eval { def comment?; true end }
131
+ Group::Comment.instance_eval { def comment?; true end }
132
+
133
+ Shared.class_eval { def decorative?; self.class.decorative? end }
134
+ Shared::ClassMethods.class_eval { def decorative?; false end }
135
+ FreeSpace.instance_eval { def decorative?; true end }
136
+ Group::Comment.instance_eval { def decorative?; true end }
137
+
138
+ Shared.class_eval { def referential?; self.class.referential? end }
139
+ Shared::ClassMethods.class_eval { def referential?; false end }
140
+ Backreference::Base.instance_eval { def referential?; true end }
141
+ Conditional::Condition.instance_eval { def referential?; true end }
142
+ Conditional::Expression.instance_eval { def referential?; true end }
97
143
  end
@@ -1,6 +1,22 @@
1
1
  module Regexp::Expression
2
2
  class Subexpression < Regexp::Expression::Base
3
3
 
4
+ # Traverses the expression, passing each recursive child to the
5
+ # given block.
6
+ # If the block takes two arguments, the indices of the children within
7
+ # their parents are also passed to it.
8
+ def each_expression(include_self = false, &block)
9
+ return enum_for(__method__, include_self) unless block
10
+
11
+ if block.arity == 1
12
+ block.call(self) if include_self
13
+ each_expression_without_index(&block)
14
+ else
15
+ block.call(self, 0) if include_self
16
+ each_expression_with_index(&block)
17
+ end
18
+ end
19
+
4
20
  # Traverses the subexpression (depth-first, pre-order) and calls the given
5
21
  # block for each expression with three arguments; the traversal event,
6
22
  # the expression, and the index of the expression within its parent.
@@ -34,31 +50,31 @@ module Regexp::Expression
34
50
  end
35
51
  alias :walk :traverse
36
52
 
37
- # Iterates over the expressions of this expression as an array, passing
38
- # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false, &block)
40
- return enum_for(__method__, include_self) unless block_given?
41
-
42
- traverse(include_self) do |event, exp, index|
43
- yield(exp, index) unless event == :exit
44
- end
45
- end
46
-
47
53
  # Returns a new array with the results of calling the given block once
48
54
  # for every expression. If a block is not given, returns an array with
49
55
  # each expression and its level index as an array.
50
56
  def flat_map(include_self = false, &block)
51
- result = []
57
+ case block && block.arity
58
+ when nil then each_expression(include_self).to_a
59
+ when 2 then each_expression(include_self).map(&block)
60
+ else each_expression(include_self).map { |exp| block.call(exp) }
61
+ end
62
+ end
52
63
 
53
- each_expression(include_self) do |exp, index|
54
- if block_given?
55
- result << yield(exp, index)
56
- else
57
- result << [exp, index]
58
- end
64
+ protected
65
+
66
+ def each_expression_with_index(&block)
67
+ each_with_index do |exp, index|
68
+ block.call(exp, index)
69
+ exp.each_expression_with_index(&block) unless exp.terminal?
59
70
  end
71
+ end
60
72
 
61
- result
73
+ def each_expression_without_index(&block)
74
+ each do |exp|
75
+ block.call(exp)
76
+ exp.each_expression_without_index(&block) unless exp.terminal?
77
+ end
62
78
  end
63
79
  end
64
80
  end
@@ -1,26 +1,20 @@
1
1
  module Regexp::Expression
2
+ # TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
3
+ # call super in #initialize, but raise in #quantifier= and #quantify,
4
+ # or introduce an Expression::Quantifiable intermediate class.
5
+ # Or actually allow chaining as a more concise but tricky solution than PR#69.
2
6
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
7
+ include Regexp::Expression::Shared
4
8
 
5
- attr_reader :token, :text, :min, :max, :mode
9
+ MODES = %i[greedy possessive reluctant]
6
10
 
7
- def initialize(token, text, min, max, mode)
8
- @token = token
9
- @text = text
10
- @mode = mode
11
- @min = min
12
- @max = max
13
- end
14
-
15
- def initialize_clone(orig)
16
- @text = orig.text.dup
17
- super
18
- end
11
+ def initialize(*args)
12
+ deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
19
13
 
20
- def to_s
21
- text.dup
14
+ init_from_token_and_options(*args)
15
+ # TODO: remove in v3.0.0, stop removing parts of #token (?)
16
+ self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
22
17
  end
23
- alias :to_str :to_s
24
18
 
25
19
  def to_h
26
20
  {
@@ -40,5 +34,51 @@ module Regexp::Expression
40
34
  RUBY
41
35
  end
42
36
  alias :lazy? :reluctant?
37
+
38
+ def min
39
+ derived_data[:min]
40
+ end
41
+
42
+ def max
43
+ derived_data[:max]
44
+ end
45
+
46
+ def mode
47
+ derived_data[:mode]
48
+ end
49
+
50
+ private
51
+
52
+ def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
53
+ warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
54
+ "is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
55
+ "Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
56
+ "with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
57
+ "will be derived automatically.\n"\
58
+ "Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
59
+ "This is consistent with how Expression::Base instances are created. "
60
+ @token = token
61
+ @text = text
62
+ end
63
+
64
+ def derived_data
65
+ @derived_data ||= begin
66
+ min, max =
67
+ case text[0]
68
+ when '?'; [0, 1]
69
+ when '*'; [0, -1]
70
+ when '+'; [1, -1]
71
+ else
72
+ int_min = text[/\{(\d*)/, 1]
73
+ int_max = text[/,?(\d*)\}/, 1]
74
+ [int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
75
+ end
76
+
77
+ mod = text[/.([?+])/, 1]
78
+ mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
79
+
80
+ { min: min, max: max, mode: mode }
81
+ end
82
+ end
43
83
  end
44
84
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -7,61 +6,26 @@ module Regexp::Expression
7
6
  # Used as the base class for the Alternation alternatives, Conditional
8
7
  # branches, and CharacterSet::Intersection intersected sequences.
9
8
  class Sequence < Regexp::Expression::Subexpression
10
- # TODO: this override is here for backwards compatibility, remove in 2.0.0
11
- def initialize(*args)
12
- if args.count == 3
13
- warn('WARNING: Sequence.new without a Regexp::Token argument is '\
14
- 'deprecated and will be removed in 2.0.0.')
15
- return self.class.at_levels(*args)
16
- end
17
- super
18
- end
19
-
20
9
  class << self
21
- def add_to(subexpression, params = {}, active_opts = {})
22
- sequence = at_levels(
23
- subexpression.level,
24
- subexpression.set_level,
25
- params[:conditional_level] || subexpression.conditional_level
10
+ def add_to(exp, params = {}, active_opts = {})
11
+ sequence = construct(
12
+ level: exp.level,
13
+ set_level: exp.set_level,
14
+ conditional_level: params[:conditional_level] || exp.conditional_level,
15
+ ts: params[:ts],
26
16
  )
27
- sequence.nesting_level = subexpression.nesting_level + 1
28
17
  sequence.options = active_opts
29
- subexpression.expressions << sequence
18
+ exp.expressions << sequence
30
19
  sequence
31
20
  end
32
-
33
- def at_levels(level, set_level, conditional_level)
34
- token = Regexp::Token.new(
35
- :expression,
36
- :sequence,
37
- '',
38
- nil, # ts
39
- nil, # te
40
- level,
41
- set_level,
42
- conditional_level
43
- )
44
- new(token)
45
- end
46
21
  end
47
22
 
48
- def starts_at
49
- expressions.first.starts_at
23
+ def ts
24
+ (head = expressions.first) ? head.ts : @ts
50
25
  end
51
- alias :ts :starts_at
52
-
53
- def quantify(token, text, min = nil, max = nil, mode = :greedy)
54
- offset = -1
55
- target = expressions[offset]
56
- while target.is_a?(FreeSpace)
57
- target = expressions[offset -= 1]
58
- end
59
26
 
60
- target || raise(ArgumentError, "No valid target found for '#{text}' "\
61
- 'quantifier')
62
-
63
- target.quantify(token, text, min, max, mode)
27
+ def quantify(token, *args)
28
+ extract_quantifier_target(token.text).quantify(token, *args)
64
29
  end
65
30
  end
66
-
67
31
  end
@@ -5,21 +5,16 @@ module Regexp::Expression
5
5
  alias :operands :expressions
6
6
  alias :operator :text
7
7
 
8
- def starts_at
9
- expressions.first.starts_at
8
+ def ts
9
+ (head = expressions.first) ? head.ts : @ts
10
10
  end
11
- alias :ts :starts_at
12
11
 
13
12
  def <<(exp)
14
13
  expressions.last << exp
15
14
  end
16
15
 
17
- def add_sequence(active_opts = {})
18
- self.class::OPERAND.add_to(self, {}, active_opts)
19
- end
20
-
21
- def to_s(format = :full)
22
- sequences.map { |e| e.to_s(format) }.join(text)
16
+ def add_sequence(active_opts = {}, params = { ts: 0 })
17
+ self.class::OPERAND.add_to(self, params, active_opts)
23
18
  end
24
19
  end
25
20
  end
@@ -0,0 +1,111 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ module ClassMethods; end # filled in ./methods/*.rb
4
+
5
+ def self.included(mod)
6
+ mod.class_eval do
7
+ extend Shared::ClassMethods
8
+
9
+ attr_accessor :type, :token, :text, :ts, :te,
10
+ :level, :set_level, :conditional_level,
11
+ :options, :parent,
12
+ :custom_to_s_handling, :pre_quantifier_decorations
13
+
14
+ attr_reader :nesting_level, :quantifier
15
+ end
16
+ end
17
+
18
+ def init_from_token_and_options(token, options = {})
19
+ self.type = token.type
20
+ self.token = token.token
21
+ self.text = token.text
22
+ self.ts = token.ts
23
+ self.te = token.te
24
+ self.level = token.level
25
+ self.set_level = token.set_level
26
+ self.conditional_level = token.conditional_level
27
+ self.nesting_level = 0
28
+ self.options = options || {}
29
+ end
30
+ private :init_from_token_and_options
31
+
32
+ def initialize_copy(orig)
33
+ self.text = orig.text.dup if orig.text
34
+ self.options = orig.options.dup if orig.options
35
+ self.quantifier = orig.quantifier.clone if orig.quantifier
36
+ self.parent = nil # updated by Subexpression#initialize_copy
37
+ if orig.pre_quantifier_decorations
38
+ self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
39
+ end
40
+ super
41
+ end
42
+
43
+ def starts_at
44
+ ts
45
+ end
46
+
47
+ def ends_at(include_quantifier = true)
48
+ ts + (include_quantifier ? full_length : base_length)
49
+ end
50
+
51
+ def base_length
52
+ to_s(:base).length
53
+ end
54
+
55
+ def full_length
56
+ to_s(:original).length
57
+ end
58
+
59
+ # #to_s reproduces the original source, as an unparser would.
60
+ #
61
+ # It takes an optional format argument.
62
+ #
63
+ # Example:
64
+ #
65
+ # lit = Regexp::Parser.parse(/a +/x)[0]
66
+ #
67
+ # lit.to_s # => 'a+' # default; with quantifier
68
+ # lit.to_s(:full) # => 'a+' # default; with quantifier
69
+ # lit.to_s(:base) # => 'a' # without quantifier
70
+ # lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
71
+ #
72
+ def to_s(format = :full)
73
+ base = parts.each_with_object(''.dup) do |part, buff|
74
+ if part.instance_of?(String)
75
+ buff << part
76
+ elsif !part.custom_to_s_handling
77
+ buff << part.to_s(:original)
78
+ end
79
+ end
80
+ "#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
81
+ end
82
+ alias :to_str :to_s
83
+
84
+ def pre_quantifier_decoration(expression_format = :original)
85
+ pre_quantifier_decorations.to_a.join if expression_format == :original
86
+ end
87
+
88
+ def quantifier_affix(expression_format = :full)
89
+ quantifier.to_s if quantified? && expression_format != :base
90
+ end
91
+
92
+ def offset
93
+ [starts_at, full_length]
94
+ end
95
+
96
+ def coded_offset
97
+ '@%d+%d' % offset
98
+ end
99
+
100
+ def nesting_level=(lvl)
101
+ @nesting_level = lvl
102
+ quantifier && quantifier.nesting_level = lvl
103
+ terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
104
+ end
105
+
106
+ def quantifier=(qtf)
107
+ @quantifier = qtf
108
+ @repetitions = nil # clear memoized value
109
+ end
110
+ end
111
+ end