regexp_parser 2.6.2 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +67 -0
  3. data/Gemfile +2 -2
  4. data/README.md +32 -29
  5. data/lib/regexp_parser/expression/base.rb +0 -7
  6. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  7. data/lib/regexp_parser/expression/classes/backreference.rb +4 -2
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  9. data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
  10. data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
  11. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  13. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  18. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  19. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  20. data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
  21. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  22. data/lib/regexp_parser/expression/sequence.rb +5 -10
  23. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  24. data/lib/regexp_parser/expression/shared.rb +37 -20
  25. data/lib/regexp_parser/expression/subexpression.rb +20 -15
  26. data/lib/regexp_parser/expression.rb +2 -0
  27. data/lib/regexp_parser/lexer.rb +76 -36
  28. data/lib/regexp_parser/parser.rb +97 -97
  29. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  30. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  31. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  32. data/lib/regexp_parser/scanner/mapping.rb +89 -0
  33. data/lib/regexp_parser/scanner/property.rl +2 -2
  34. data/lib/regexp_parser/scanner/scanner.rl +90 -169
  35. data/lib/regexp_parser/scanner.rb +1157 -1330
  36. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  37. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  38. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  39. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  40. data/lib/regexp_parser/syntax/token/unicode_property.rb +3 -0
  41. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  42. data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
  43. data/lib/regexp_parser/syntax/versions.rb +2 -0
  44. data/lib/regexp_parser/version.rb +1 -1
  45. metadata +10 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 66568005494b517613155277c6be4731eb8a26bb9b48a692a9430507286ce583
4
- data.tar.gz: d1fc6c6f1a0c7f939c51703ac844c2dbb134f96e0e55780646cb7e3e87d7a652
3
+ metadata.gz: bed928e92928d8f595241456658e516f3afd2474196ca4d6fdbb849c072d5024
4
+ data.tar.gz: 48d50057af6883cd2d67050fc05aed79e87342f6067eb80734729a8440c08a69
5
5
  SHA512:
6
- metadata.gz: b955b2215b71c94497e52841142fab8c2b9930d0d6cea6ea2b3eeb8ed9fe84575e2f34aae3a6051af2b56429f98cf070b9151805f2cb93ddb511ec1e0e50dd7c
7
- data.tar.gz: 3a4f083942b66ddb4b67ab33f14bb1c0b724a60c2b30605059d32ce3648e9cb46e31e797b7a526a2028c1e018d73365f5ef955256de4e63397d6ea105714ff12
6
+ metadata.gz: 455e79dd780d7d5c130fae56140158615195601f68ea9eb83367d0b9faaf631586bbf12f5b9243d16bb42d29eeb57ba595f87a3b4604b32af059dc9a72c4d6d4
7
+ data.tar.gz: 37216a681eda06118b7317e64cab14cb06e39e4923433225598b60b8b36684ab831e4d90960516adbfdaa16811b274c2181eb38a13ddd259fb6790cbeef99ebf
data/CHANGELOG.md CHANGED
@@ -7,6 +7,73 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
11
+
12
+ ### Added
13
+
14
+ - `Regexp::Expression::Shared#ends_at`
15
+ * e.g. `parse(/a +/x)[0].ends_at # => 3`
16
+ * e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
17
+ - `Regexp::Expression::Shared#{capturing?,comment?}`
18
+ * previously only available on capturing and comment groups
19
+ - `Regexp::Expression::Shared#{decorative?}`
20
+ * true for decorations: comment groups as well as comments and whitespace in x-mode
21
+ - `Regexp::Expression::Shared#parent`
22
+ - new format argument `:original` for `Regexp::Expression::Base#to_s`
23
+ * includes decorative elements between node and its quantifier
24
+ * e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
25
+ * using it is not needed when calling `Root#to_s` as Root can't be quantified
26
+ - support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
27
+ * in this case, only the expressions are passed to the block, no indices
28
+ - support calling test methods at Expression class level
29
+ - `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
30
+ - e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
31
+
32
+ ### Fixed
33
+
34
+ - `Regexp::Expression::Shared#full_length` with whitespace before quantifier
35
+ * e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
36
+ - `Subexpression#to_s` output with children with whitespace before their quantifier
37
+ * e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
38
+ * calling `#to_s` on sub-nodes still omits such decorative interludes by default
39
+ - use new `#to_s` format `:original` to include it
40
+ - e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
41
+ - fixed `Subexpression#te` behaving differently from other expressions
42
+ * only `Subexpression#te` used to include the quantifier
43
+ * now `#te` is the end index without quantifier, as for other expressions
44
+ - fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
45
+ * e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
46
+ * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
47
+ - fixed nested comment groups breaking local x-options
48
+ * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
49
+ - fixed nested comment groups breaking conditionals
50
+ * e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
51
+ - fixed quantifiers after comment groups being mis-assigned to that group
52
+ * e.g. in `/a(?#foo){3}/` (matches 'aaa')
53
+ - fixed Scanner accepting two cases of invalid Regexp syntax
54
+ * unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
55
+ * these are a `SyntaxError` in Ruby, so could only be passed as a String
56
+ * they now raise a `Regexp::Scanner::ScannerError`
57
+ - fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
58
+ - reduced verbosity of inspect / pretty print output
59
+
60
+ ## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
61
+
62
+ ### Added
63
+
64
+ - `Regexp::Lexer.lex` now streams tokens when called with a block
65
+ * it can now take arbitrarily large input, just like `Regexp::Scanner`
66
+ * this also slightly improves `Regexp::Parser.parse` performance
67
+ * note: `Regexp::Parser.parse` still does not and will not support streaming
68
+ - improved performance of `Subexpression#each_expression`
69
+ - minor improvements to `Regexp::Scanner` performance
70
+ - overall improvement of parse performance: about 10% for large Regexps
71
+
72
+ ### Fixed
73
+
74
+ - parsing of octal escape sequences in sets, e.g. `[\141]`
75
+ * thanks to [Randy Stauner](https://github.com/rwstauner) for the report
76
+
10
77
  ## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
11
78
 
12
79
  ### Fixed
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
6
+ gem 'leto', '~> 2.0'
7
7
  gem 'rake', '~> 13.0'
8
8
  gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
- gem 'gouteur'
12
+ gem 'gouteur', '~> 1.1'
13
13
  gem 'rubocop', '~> 1.7'
14
14
  end
15
15
  end
data/README.md CHANGED
@@ -67,7 +67,7 @@ called with the results as follows:
67
67
  * **Scanner**: the block gets passed the results as they are scanned. See the
68
68
  example in the next section for details.
69
69
 
70
- * **Lexer**: after completion, the block gets passed the tokens one by one.
70
+ * **Lexer**: the block gets passed the tokens one by one as they are scanned.
71
71
  _The result of the block is returned._
72
72
 
73
73
  * **Parser**: after completion, the block gets passed the root expression.
@@ -126,7 +126,7 @@ parts of the pattern:
126
126
 
127
127
  ```ruby
128
128
  Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
129
- #=> ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
129
+ # => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
130
130
  ```
131
131
 
132
132
 
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
248
248
 
249
249
  ```ruby
250
250
  Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
251
- #=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
251
+ # => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
252
252
  ```
253
253
 
254
254
  #### Notes
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
262
262
  ### Parser
263
263
  Sits on top of the lexer and transforms the "stream" of Token objects emitted
264
264
  by it into a tree of Expression objects represented by an instance of the
265
- Expression::Root class.
265
+ `Expression::Root` class.
266
266
 
267
267
  See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
268
268
  wiki page for attributes and methods.
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
270
270
 
271
271
  #### Example
272
272
 
273
+ This example uses the tree traversal method `#each_expression`
274
+ and the method `#strfregexp` to print each object in the tree.
275
+
276
+ ```ruby
277
+ include_root = true
278
+ indent_offset = include_root ? 1 : 0
279
+
280
+ tree.each_expression(include_root) do |exp|
281
+ puts exp.strfregexp("%>> %c", indent_offset)
282
+ end
283
+
284
+ # Output
285
+ # > Regexp::Expression::Root
286
+ # > Regexp::Expression::Literal
287
+ # > Regexp::Expression::Group::Capture
288
+ # > Regexp::Expression::Literal
289
+ # > Regexp::Expression::Group::Capture
290
+ # > Regexp::Expression::Literal
291
+ # > Regexp::Expression::Literal
292
+ # > Regexp::Expression::Group::Named
293
+ # > Regexp::Expression::CharacterSet
294
+ ```
295
+
296
+ _Note: quantifiers do not appear in the output because they are members of the
297
+ Expression class. See the next section for details._
298
+
299
+ Another example, using `#traverse` for a more fine-grained tree traversal:
300
+
273
301
  ```ruby
274
302
  require 'regexp_parser'
275
303
 
@@ -295,34 +323,9 @@ end
295
323
  # exit: group `(?<name>[0-9]+)`
296
324
  ```
297
325
 
298
- Another example, using each_expression and strfregexp to print the object tree.
299
326
  _See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
300
327
  for more information on these methods._
301
328
 
302
- ```ruby
303
- include_root = true
304
- indent_offset = include_root ? 1 : 0
305
-
306
- tree.each_expression(include_root) do |exp, level_index|
307
- puts exp.strfregexp("%>> %c", indent_offset)
308
- end
309
-
310
- # Output
311
- # > Regexp::Expression::Root
312
- # > Regexp::Expression::Literal
313
- # > Regexp::Expression::Group::Capture
314
- # > Regexp::Expression::Literal
315
- # > Regexp::Expression::Group::Capture
316
- # > Regexp::Expression::Literal
317
- # > Regexp::Expression::Literal
318
- # > Regexp::Expression::Group::Named
319
- # > Regexp::Expression::CharacterSet
320
- ```
321
-
322
- _Note: quantifiers do not appear in the output because they are members of the
323
- Expression class. See the next section for details._
324
-
325
-
326
329
  ---
327
330
 
328
331
 
@@ -6,13 +6,6 @@ module Regexp::Expression
6
6
  init_from_token_and_options(token, options)
7
7
  end
8
8
 
9
- def initialize_copy(orig)
10
- self.text = orig.text.dup if orig.text
11
- self.options = orig.options.dup if orig.options
12
- self.quantifier = orig.quantifier.clone if orig.quantifier
13
- super
14
- end
15
-
16
9
  def to_re(format = :full)
17
10
  if set_level > 0
18
11
  warn "Calling #to_re on character set members is deprecated - "\
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :backref, one way or the other, in v3.0.0
3
2
  module Backreference
4
3
  class Base < Regexp::Expression::Base
5
4
  attr_accessor :referenced_expression
@@ -27,7 +26,7 @@ module Regexp::Expression
27
26
  alias reference number
28
27
 
29
28
  def initialize(token, options = {})
30
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
31
30
  super
32
31
  end
33
32
  end
@@ -70,4 +69,7 @@ module Regexp::Expression
70
69
  end
71
70
  end
72
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
73
75
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def parts
20
- intersperse(expressions, text.dup)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -19,9 +19,8 @@ module Regexp::Expression
19
19
  def close
20
20
  self.closed = true
21
21
  end
22
-
23
- def parts
24
- ["#{text}#{'^' if negated?}", *expressions, ']']
25
- end
26
22
  end
23
+
24
+ # alias for symmetry between token symbol and Expression class name
25
+ Set = CharacterSet
27
26
  end # module Regexp::Expression
@@ -31,9 +31,9 @@ module Regexp::Expression
31
31
  expressions.last << exp
32
32
  end
33
33
 
34
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
35
35
  raise TooManyBranches.new if branches.length == 2
36
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
37
37
  Branch.add_to(self, params, active_opts)
38
38
  end
39
39
  alias :branch :add_sequence
@@ -55,10 +55,6 @@ module Regexp::Expression
55
55
  condition.reference
56
56
  end
57
57
 
58
- def parts
59
- [text.dup, condition, *intersperse(branches, '|'), ')']
60
- end
61
-
62
58
  def initialize_copy(orig)
63
59
  self.referenced_expression = orig.referenced_expression.dup
64
60
  super
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
3
2
  module EscapeSequence
4
3
  class Base < Regexp::Expression::Base
5
4
  def codepoint
@@ -97,4 +96,7 @@ module Regexp::Expression
97
96
  end
98
97
  end
99
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
100
102
  end
@@ -5,10 +5,12 @@ module Regexp::Expression
5
5
  end
6
6
  end
7
7
 
8
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
9
10
 
10
11
  class WhiteSpace < Regexp::Expression::FreeSpace
11
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
12
14
  text << exp.text
13
15
  end
14
16
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def parts
5
- [text.dup, *expressions, ')']
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def parts
22
- if implicit?
23
- expressions
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -55,8 +40,6 @@ module Regexp::Expression
55
40
  class Capture < Group::Base
56
41
  attr_accessor :number, :number_at_level
57
42
  alias identifier number
58
-
59
- def capturing?; true end
60
43
  end
61
44
 
62
45
  class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
75
58
  end
76
59
 
77
60
  class Comment < Group::Base
78
- def parts
79
- [text.dup]
80
- end
81
-
82
- def comment?; true end
83
61
  end
84
62
  end
85
63
 
@@ -5,7 +5,11 @@ module Regexp::Expression
5
5
  end
6
6
 
7
7
  def name
8
- token.to_s
8
+ text[/\w+/]
9
9
  end
10
10
  end
11
+
12
+ # alias for symmetry between token symbol and Expression class name
13
+ Posixclass = PosixClass
14
+ Nonposixclass = PosixClass
11
15
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :property, one way or the other, in v3.0.0
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -11,7 +10,7 @@ module Regexp::Expression
11
10
  end
12
11
 
13
12
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
13
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
14
  end
16
15
  end
17
16
 
@@ -116,4 +115,8 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
118
+
119
+ # alias for symmetry between token symbol and Expression class name
120
+ Property = UnicodeProperty
121
+ Nonproperty = UnicodeProperty
119
122
  end # module Regexp::Expression
@@ -25,11 +25,9 @@ module Regexp::Expression
25
25
  def token_class
26
26
  if self == Root || self < Sequence
27
27
  nil # no token class because these objects are Parser-generated
28
- # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
- elsif self == Alternation || self == CharacterType::Any
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
30
  Regexp::Syntax::Token::Meta
31
- elsif self <= EscapeSequence::Base
32
- Regexp::Syntax::Token::Escape
33
31
  else
34
32
  Regexp::Syntax::Token.const_get(name.split('::')[2])
35
33
  end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -95,12 +95,49 @@ module Regexp::Expression
95
95
  end
96
96
 
97
97
  # Deep-compare two expressions for equality.
98
+ #
99
+ # When changing the conditions, please make sure to update
100
+ # #pretty_print_instance_variables so that it includes all relevant values.
98
101
  def ==(other)
99
- other.class == self.class &&
100
- other.to_s == to_s &&
101
- other.options == options
102
+ self.class == other.class &&
103
+ text == other.text &&
104
+ quantifier == other.quantifier &&
105
+ options == other.options &&
106
+ (terminal? || expressions == other.expressions)
102
107
  end
103
108
  alias :=== :==
104
109
  alias :eql? :==
110
+
111
+ def optional?
112
+ quantified? && quantifier.min == 0
113
+ end
114
+
115
+ def quantified?
116
+ !quantifier.nil?
117
+ end
105
118
  end
119
+
120
+ Shared.class_eval { def terminal?; self.class.terminal? end }
121
+ Shared::ClassMethods.class_eval { def terminal?; true end }
122
+ Subexpression.instance_eval { def terminal?; false end }
123
+
124
+ Shared.class_eval { def capturing?; self.class.capturing? end }
125
+ Shared::ClassMethods.class_eval { def capturing?; false end }
126
+ Group::Capture.instance_eval { def capturing?; true end }
127
+
128
+ Shared.class_eval { def comment?; self.class.comment? end }
129
+ Shared::ClassMethods.class_eval { def comment?; false end }
130
+ Comment.instance_eval { def comment?; true end }
131
+ Group::Comment.instance_eval { def comment?; true end }
132
+
133
+ Shared.class_eval { def decorative?; self.class.decorative? end }
134
+ Shared::ClassMethods.class_eval { def decorative?; false end }
135
+ FreeSpace.instance_eval { def decorative?; true end }
136
+ Group::Comment.instance_eval { def decorative?; true end }
137
+
138
+ Shared.class_eval { def referential?; self.class.referential? end }
139
+ Shared::ClassMethods.class_eval { def referential?; false end }
140
+ Backreference::Base.instance_eval { def referential?; true end }
141
+ Conditional::Condition.instance_eval { def referential?; true end }
142
+ Conditional::Expression.instance_eval { def referential?; true end }
106
143
  end
@@ -1,6 +1,22 @@
1
1
  module Regexp::Expression
2
2
  class Subexpression < Regexp::Expression::Base
3
3
 
4
+ # Traverses the expression, passing each recursive child to the
5
+ # given block.
6
+ # If the block takes two arguments, the indices of the children within
7
+ # their parents are also passed to it.
8
+ def each_expression(include_self = false, &block)
9
+ return enum_for(__method__, include_self) unless block
10
+
11
+ if block.arity == 1
12
+ block.call(self) if include_self
13
+ each_expression_without_index(&block)
14
+ else
15
+ block.call(self, 0) if include_self
16
+ each_expression_with_index(&block)
17
+ end
18
+ end
19
+
4
20
  # Traverses the subexpression (depth-first, pre-order) and calls the given
5
21
  # block for each expression with three arguments; the traversal event,
6
22
  # the expression, and the index of the expression within its parent.
@@ -34,31 +50,31 @@ module Regexp::Expression
34
50
  end
35
51
  alias :walk :traverse
36
52
 
37
- # Iterates over the expressions of this expression as an array, passing
38
- # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false)
40
- return enum_for(__method__, include_self) unless block_given?
41
-
42
- traverse(include_self) do |event, exp, index|
43
- yield(exp, index) unless event == :exit
44
- end
45
- end
46
-
47
53
  # Returns a new array with the results of calling the given block once
48
54
  # for every expression. If a block is not given, returns an array with
49
55
  # each expression and its level index as an array.
50
- def flat_map(include_self = false)
51
- result = []
56
+ def flat_map(include_self = false, &block)
57
+ case block && block.arity
58
+ when nil then each_expression(include_self).to_a
59
+ when 2 then each_expression(include_self).map(&block)
60
+ else each_expression(include_self).map { |exp| block.call(exp) }
61
+ end
62
+ end
52
63
 
53
- each_expression(include_self) do |exp, index|
54
- if block_given?
55
- result << yield(exp, index)
56
- else
57
- result << [exp, index]
58
- end
64
+ protected
65
+
66
+ def each_expression_with_index(&block)
67
+ each_with_index do |exp, index|
68
+ block.call(exp, index)
69
+ exp.each_expression_with_index(&block) unless exp.terminal?
59
70
  end
71
+ end
60
72
 
61
- result
73
+ def each_expression_without_index(&block)
74
+ each do |exp|
75
+ block.call(exp)
76
+ exp.each_expression_without_index(&block) unless exp.terminal?
77
+ end
62
78
  end
63
79
  end
64
80
  end