regexp_parser 2.7.0 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +55 -3
  3. data/Gemfile +2 -2
  4. data/README.md +32 -29
  5. data/lib/regexp_parser/expression/base.rb +0 -7
  6. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  7. data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
  8. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  9. data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
  10. data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
  11. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  12. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  13. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  14. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  15. data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
  16. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  17. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  18. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  19. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  20. data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
  21. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  22. data/lib/regexp_parser/expression/sequence.rb +5 -9
  23. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  24. data/lib/regexp_parser/expression/shared.rb +37 -24
  25. data/lib/regexp_parser/expression/subexpression.rb +20 -18
  26. data/lib/regexp_parser/expression.rb +2 -0
  27. data/lib/regexp_parser/lexer.rb +15 -7
  28. data/lib/regexp_parser/parser.rb +85 -86
  29. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  30. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  31. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  32. data/lib/regexp_parser/scanner/mapping.rb +89 -0
  33. data/lib/regexp_parser/scanner/property.rl +1 -1
  34. data/lib/regexp_parser/scanner/scanner.rl +35 -129
  35. data/lib/regexp_parser/scanner.rb +1084 -1303
  36. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  37. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  38. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  39. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  40. data/lib/regexp_parser/syntax/token/unicode_property.rb +3 -0
  41. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  42. data/lib/regexp_parser/version.rb +1 -1
  43. metadata +9 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04af46818e9d560362fea9b3fd24802b557ac145ed95f6e02580dd7cf5e8ddfc
4
- data.tar.gz: 75b7d30241f48ddf90c8cd68228fa928904ab6055ea755f4bdcf28361e645a4b
3
+ metadata.gz: bed928e92928d8f595241456658e516f3afd2474196ca4d6fdbb849c072d5024
4
+ data.tar.gz: 48d50057af6883cd2d67050fc05aed79e87342f6067eb80734729a8440c08a69
5
5
  SHA512:
6
- metadata.gz: 407025a9b14af76463260fca2a48f9fef4ab863e3dddf3f7f54101c1348611afa49d9973e850d9e1c84d6e5faf8f1a9d3d2da5dceaefe8dc4fefe7069ecd9280
7
- data.tar.gz: 9f3d2eb4264318511a82e9034c4c4a8a8e73e67e427945f0c9f745fd37b2f2f0ae8e30ba942f0920da3109b59436a5518dfc5e2f7669317de0214a0deb6f0e07
6
+ metadata.gz: 455e79dd780d7d5c130fae56140158615195601f68ea9eb83367d0b9faaf631586bbf12f5b9243d16bb42d29eeb57ba595f87a3b4604b32af059dc9a72c4d6d4
7
+ data.tar.gz: 37216a681eda06118b7317e64cab14cb06e39e4923433225598b60b8b36684ab831e4d90960516adbfdaa16811b274c2181eb38a13ddd259fb6790cbeef99ebf
data/CHANGELOG.md CHANGED
@@ -5,14 +5,66 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [Unreleased]
9
+
10
+ ## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
11
+
12
+ ### Added
13
+
14
+ - `Regexp::Expression::Shared#ends_at`
15
+ * e.g. `parse(/a +/x)[0].ends_at # => 3`
16
+ * e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
17
+ - `Regexp::Expression::Shared#{capturing?,comment?}`
18
+ * previously only available on capturing and comment groups
19
+ - `Regexp::Expression::Shared#{decorative?}`
20
+ * true for decorations: comment groups as well as comments and whitespace in x-mode
21
+ - `Regexp::Expression::Shared#parent`
22
+ - new format argument `:original` for `Regexp::Expression::Base#to_s`
23
+ * includes decorative elements between node and its quantifier
24
+ * e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
25
+ * using it is not needed when calling `Root#to_s` as Root can't be quantified
26
+ - support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
27
+ * in this case, only the expressions are passed to the block, no indices
28
+ - support calling test methods at Expression class level
29
+ - `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
30
+ - e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
31
+
32
+ ### Fixed
33
+
34
+ - `Regexp::Expression::Shared#full_length` with whitespace before quantifier
35
+ * e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
36
+ - `Subexpression#to_s` output with children with whitespace before their quantifier
37
+ * e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
38
+ * calling `#to_s` on sub-nodes still omits such decorative interludes by default
39
+ - use new `#to_s` format `:original` to include it
40
+ - e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
41
+ - fixed `Subexpression#te` behaving differently from other expressions
42
+ * only `Subexpression#te` used to include the quantifier
43
+ * now `#te` is the end index without quantifier, as for other expressions
44
+ - fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
45
+ * e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
46
+ * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
47
+ - fixed nested comment groups breaking local x-options
48
+ * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
49
+ - fixed nested comment groups breaking conditionals
50
+ * e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
51
+ - fixed quantifiers after comment groups being mis-assigned to that group
52
+ * e.g. in `/a(?#foo){3}/` (matches 'aaa')
53
+ - fixed Scanner accepting two cases of invalid Regexp syntax
54
+ * unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
55
+ * these are a `SyntaxError` in Ruby, so could only be passed as a String
56
+ * they now raise a `Regexp::Scanner::ScannerError`
57
+ - fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
58
+ - reduced verbosity of inspect / pretty print output
59
+
8
60
  ## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
9
61
 
10
62
  ### Added
11
63
 
12
64
  - `Regexp::Lexer.lex` now streams tokens when called with a block
13
- - it can now take arbitrarily large input, just like `Regexp::Scanner`
14
- - this also slightly improves `Regexp::Parser.parse` performance
15
- - note: `Regexp::Parser.parse` still does not and will not support streaming
65
+ * it can now take arbitrarily large input, just like `Regexp::Scanner`
66
+ * this also slightly improves `Regexp::Parser.parse` performance
67
+ * note: `Regexp::Parser.parse` still does not and will not support streaming
16
68
  - improved performance of `Subexpression#each_expression`
17
69
  - minor improvements to `Regexp::Scanner` performance
18
70
  - overall improvement of parse performance: about 10% for large Regexps
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
6
+ gem 'leto', '~> 2.0'
7
7
  gem 'rake', '~> 13.0'
8
8
  gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
- gem 'gouteur'
12
+ gem 'gouteur', '~> 1.1'
13
13
  gem 'rubocop', '~> 1.7'
14
14
  end
15
15
  end
data/README.md CHANGED
@@ -67,7 +67,7 @@ called with the results as follows:
67
67
  * **Scanner**: the block gets passed the results as they are scanned. See the
68
68
  example in the next section for details.
69
69
 
70
- * **Lexer**: after completion, the block gets passed the tokens one by one.
70
+ * **Lexer**: the block gets passed the tokens one by one as they are scanned.
71
71
  _The result of the block is returned._
72
72
 
73
73
  * **Parser**: after completion, the block gets passed the root expression.
@@ -126,7 +126,7 @@ parts of the pattern:
126
126
 
127
127
  ```ruby
128
128
  Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
129
- #=> ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
129
+ # => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
130
130
  ```
131
131
 
132
132
 
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
248
248
 
249
249
  ```ruby
250
250
  Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
251
- #=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
251
+ # => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
252
252
  ```
253
253
 
254
254
  #### Notes
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
262
262
  ### Parser
263
263
  Sits on top of the lexer and transforms the "stream" of Token objects emitted
264
264
  by it into a tree of Expression objects represented by an instance of the
265
- Expression::Root class.
265
+ `Expression::Root` class.
266
266
 
267
267
  See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
268
268
  wiki page for attributes and methods.
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
270
270
 
271
271
  #### Example
272
272
 
273
+ This example uses the tree traversal method `#each_expression`
274
+ and the method `#strfregexp` to print each object in the tree.
275
+
276
+ ```ruby
277
+ include_root = true
278
+ indent_offset = include_root ? 1 : 0
279
+
280
+ tree.each_expression(include_root) do |exp|
281
+ puts exp.strfregexp("%>> %c", indent_offset)
282
+ end
283
+
284
+ # Output
285
+ # > Regexp::Expression::Root
286
+ # > Regexp::Expression::Literal
287
+ # > Regexp::Expression::Group::Capture
288
+ # > Regexp::Expression::Literal
289
+ # > Regexp::Expression::Group::Capture
290
+ # > Regexp::Expression::Literal
291
+ # > Regexp::Expression::Literal
292
+ # > Regexp::Expression::Group::Named
293
+ # > Regexp::Expression::CharacterSet
294
+ ```
295
+
296
+ _Note: quantifiers do not appear in the output because they are members of the
297
+ Expression class. See the next section for details._
298
+
299
+ Another example, using `#traverse` for a more fine-grained tree traversal:
300
+
273
301
  ```ruby
274
302
  require 'regexp_parser'
275
303
 
@@ -295,34 +323,9 @@ end
295
323
  # exit: group `(?<name>[0-9]+)`
296
324
  ```
297
325
 
298
- Another example, using each_expression and strfregexp to print the object tree.
299
326
  _See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
300
327
  for more information on these methods._
301
328
 
302
- ```ruby
303
- include_root = true
304
- indent_offset = include_root ? 1 : 0
305
-
306
- tree.each_expression(include_root) do |exp, level_index|
307
- puts exp.strfregexp("%>> %c", indent_offset)
308
- end
309
-
310
- # Output
311
- # > Regexp::Expression::Root
312
- # > Regexp::Expression::Literal
313
- # > Regexp::Expression::Group::Capture
314
- # > Regexp::Expression::Literal
315
- # > Regexp::Expression::Group::Capture
316
- # > Regexp::Expression::Literal
317
- # > Regexp::Expression::Literal
318
- # > Regexp::Expression::Group::Named
319
- # > Regexp::Expression::CharacterSet
320
- ```
321
-
322
- _Note: quantifiers do not appear in the output because they are members of the
323
- Expression class. See the next section for details._
324
-
325
-
326
329
  ---
327
330
 
328
331
 
@@ -6,13 +6,6 @@ module Regexp::Expression
6
6
  init_from_token_and_options(token, options)
7
7
  end
8
8
 
9
- def initialize_copy(orig)
10
- self.text = orig.text.dup if orig.text
11
- self.options = orig.options.dup if orig.options
12
- self.quantifier = orig.quantifier.clone if orig.quantifier
13
- super
14
- end
15
-
16
9
  def to_re(format = :full)
17
10
  if set_level > 0
18
11
  warn "Calling #to_re on character set members is deprecated - "\
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :backref, one way or the other, in v3.0.0
3
2
  module Backreference
4
3
  class Base < Regexp::Expression::Base
5
4
  attr_accessor :referenced_expression
@@ -20,10 +19,6 @@ module Regexp::Expression
20
19
 
21
20
  super
22
21
  end
23
-
24
- def referential?
25
- true
26
- end
27
22
  end
28
23
 
29
24
  class Number < Backreference::Base
@@ -31,7 +26,7 @@ module Regexp::Expression
31
26
  alias reference number
32
27
 
33
28
  def initialize(token, options = {})
34
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
35
30
  super
36
31
  end
37
32
  end
@@ -74,4 +69,7 @@ module Regexp::Expression
74
69
  end
75
70
  end
76
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
77
75
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def parts
20
- intersperse(expressions, text.dup)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -19,9 +19,8 @@ module Regexp::Expression
19
19
  def close
20
20
  self.closed = true
21
21
  end
22
-
23
- def parts
24
- ["#{text}#{'^' if negated?}", *expressions, ']']
25
- end
26
22
  end
23
+
24
+ # alias for symmetry between token symbol and Expression class name
25
+ Set = CharacterSet
27
26
  end # module Regexp::Expression
@@ -20,10 +20,6 @@ module Regexp::Expression
20
20
  self.referenced_expression = orig.referenced_expression.dup
21
21
  super
22
22
  end
23
-
24
- def referential?
25
- true
26
- end
27
23
  end
28
24
 
29
25
  class Branch < Regexp::Expression::Sequence; end
@@ -35,9 +31,9 @@ module Regexp::Expression
35
31
  expressions.last << exp
36
32
  end
37
33
 
38
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
39
35
  raise TooManyBranches.new if branches.length == 2
40
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
41
37
  Branch.add_to(self, params, active_opts)
42
38
  end
43
39
  alias :branch :add_sequence
@@ -59,14 +55,6 @@ module Regexp::Expression
59
55
  condition.reference
60
56
  end
61
57
 
62
- def referential?
63
- true
64
- end
65
-
66
- def parts
67
- [text.dup, condition, *intersperse(branches, '|'), ')']
68
- end
69
-
70
58
  def initialize_copy(orig)
71
59
  self.referenced_expression = orig.referenced_expression.dup
72
60
  super
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
3
2
  module EscapeSequence
4
3
  class Base < Regexp::Expression::Base
5
4
  def codepoint
@@ -97,4 +96,7 @@ module Regexp::Expression
97
96
  end
98
97
  end
99
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
100
102
  end
@@ -5,10 +5,12 @@ module Regexp::Expression
5
5
  end
6
6
  end
7
7
 
8
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
9
10
 
10
11
  class WhiteSpace < Regexp::Expression::FreeSpace
11
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
12
14
  text << exp.text
13
15
  end
14
16
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def parts
5
- [text.dup, *expressions, ')']
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def parts
22
- if implicit?
23
- expressions
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -55,8 +40,6 @@ module Regexp::Expression
55
40
  class Capture < Group::Base
56
41
  attr_accessor :number, :number_at_level
57
42
  alias identifier number
58
-
59
- def capturing?; true end
60
43
  end
61
44
 
62
45
  class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
75
58
  end
76
59
 
77
60
  class Comment < Group::Base
78
- def parts
79
- [text.dup]
80
- end
81
-
82
- def comment?; true end
83
61
  end
84
62
  end
85
63
 
@@ -5,7 +5,11 @@ module Regexp::Expression
5
5
  end
6
6
 
7
7
  def name
8
- token.to_s
8
+ text[/\w+/]
9
9
  end
10
10
  end
11
+
12
+ # alias for symmetry between token symbol and Expression class name
13
+ Posixclass = PosixClass
14
+ Nonposixclass = PosixClass
11
15
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :property, one way or the other, in v3.0.0
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -11,7 +10,7 @@ module Regexp::Expression
11
10
  end
12
11
 
13
12
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
13
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
14
  end
16
15
  end
17
16
 
@@ -116,4 +115,8 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
118
+
119
+ # alias for symmetry between token symbol and Expression class name
120
+ Property = UnicodeProperty
121
+ Nonproperty = UnicodeProperty
119
122
  end # module Regexp::Expression
@@ -25,11 +25,9 @@ module Regexp::Expression
25
25
  def token_class
26
26
  if self == Root || self < Sequence
27
27
  nil # no token class because these objects are Parser-generated
28
- # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
- elsif self == Alternation || self == CharacterType::Any
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
30
  Regexp::Syntax::Token::Meta
31
- elsif self <= EscapeSequence::Base
32
- Regexp::Syntax::Token::Escape
33
31
  else
34
32
  Regexp::Syntax::Token.const_get(name.split('::')[2])
35
33
  end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -95,12 +95,49 @@ module Regexp::Expression
95
95
  end
96
96
 
97
97
  # Deep-compare two expressions for equality.
98
+ #
99
+ # When changing the conditions, please make sure to update
100
+ # #pretty_print_instance_variables so that it includes all relevant values.
98
101
  def ==(other)
99
- other.class == self.class &&
100
- other.to_s == to_s &&
101
- other.options == options
102
+ self.class == other.class &&
103
+ text == other.text &&
104
+ quantifier == other.quantifier &&
105
+ options == other.options &&
106
+ (terminal? || expressions == other.expressions)
102
107
  end
103
108
  alias :=== :==
104
109
  alias :eql? :==
110
+
111
+ def optional?
112
+ quantified? && quantifier.min == 0
113
+ end
114
+
115
+ def quantified?
116
+ !quantifier.nil?
117
+ end
105
118
  end
119
+
120
+ Shared.class_eval { def terminal?; self.class.terminal? end }
121
+ Shared::ClassMethods.class_eval { def terminal?; true end }
122
+ Subexpression.instance_eval { def terminal?; false end }
123
+
124
+ Shared.class_eval { def capturing?; self.class.capturing? end }
125
+ Shared::ClassMethods.class_eval { def capturing?; false end }
126
+ Group::Capture.instance_eval { def capturing?; true end }
127
+
128
+ Shared.class_eval { def comment?; self.class.comment? end }
129
+ Shared::ClassMethods.class_eval { def comment?; false end }
130
+ Comment.instance_eval { def comment?; true end }
131
+ Group::Comment.instance_eval { def comment?; true end }
132
+
133
+ Shared.class_eval { def decorative?; self.class.decorative? end }
134
+ Shared::ClassMethods.class_eval { def decorative?; false end }
135
+ FreeSpace.instance_eval { def decorative?; true end }
136
+ Group::Comment.instance_eval { def decorative?; true end }
137
+
138
+ Shared.class_eval { def referential?; self.class.referential? end }
139
+ Shared::ClassMethods.class_eval { def referential?; false end }
140
+ Backreference::Base.instance_eval { def referential?; true end }
141
+ Conditional::Condition.instance_eval { def referential?; true end }
142
+ Conditional::Expression.instance_eval { def referential?; true end }
106
143
  end
@@ -1,6 +1,22 @@
1
1
  module Regexp::Expression
2
2
  class Subexpression < Regexp::Expression::Base
3
3
 
4
+ # Traverses the expression, passing each recursive child to the
5
+ # given block.
6
+ # If the block takes two arguments, the indices of the children within
7
+ # their parents are also passed to it.
8
+ def each_expression(include_self = false, &block)
9
+ return enum_for(__method__, include_self) unless block
10
+
11
+ if block.arity == 1
12
+ block.call(self) if include_self
13
+ each_expression_without_index(&block)
14
+ else
15
+ block.call(self, 0) if include_self
16
+ each_expression_with_index(&block)
17
+ end
18
+ end
19
+
4
20
  # Traverses the subexpression (depth-first, pre-order) and calls the given
5
21
  # block for each expression with three arguments; the traversal event,
6
22
  # the expression, and the index of the expression within its parent.
@@ -34,34 +50,31 @@ module Regexp::Expression
34
50
  end
35
51
  alias :walk :traverse
36
52
 
37
- # Iterates over the expressions of this expression as an array, passing
38
- # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false, &block)
40
- return enum_for(__method__, include_self) unless block_given?
53
+ # Returns a new array with the results of calling the given block once
54
+ # for every expression. If a block is not given, returns an array with
55
+ # each expression and its level index as an array.
56
+ def flat_map(include_self = false, &block)
57
+ case block && block.arity
58
+ when nil then each_expression(include_self).to_a
59
+ when 2 then each_expression(include_self).map(&block)
60
+ else each_expression(include_self).map { |exp| block.call(exp) }
61
+ end
62
+ end
41
63
 
42
- block.call(self, 0) if include_self
64
+ protected
43
65
 
66
+ def each_expression_with_index(&block)
44
67
  each_with_index do |exp, index|
45
68
  block.call(exp, index)
46
- exp.each_expression(&block) unless exp.terminal?
69
+ exp.each_expression_with_index(&block) unless exp.terminal?
47
70
  end
48
71
  end
49
72
 
50
- # Returns a new array with the results of calling the given block once
51
- # for every expression. If a block is not given, returns an array with
52
- # each expression and its level index as an array.
53
- def flat_map(include_self = false)
54
- result = []
55
-
56
- each_expression(include_self) do |exp, index|
57
- if block_given?
58
- result << yield(exp, index)
59
- else
60
- result << [exp, index]
61
- end
73
+ def each_expression_without_index(&block)
74
+ each do |exp|
75
+ block.call(exp)
76
+ exp.each_expression_without_index(&block) unless exp.terminal?
62
77
  end
63
-
64
- result
65
78
  end
66
79
  end
67
80
  end