regexp_parser 2.7.0 → 2.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +62 -3
  3. data/Gemfile +3 -3
  4. data/LICENSE +1 -1
  5. data/README.md +33 -30
  6. data/lib/regexp_parser/expression/base.rb +0 -7
  7. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  8. data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
  9. data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
  10. data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
  11. data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
  12. data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
  13. data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
  14. data/lib/regexp_parser/expression/classes/group.rb +0 -22
  15. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  16. data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
  17. data/lib/regexp_parser/expression/methods/construct.rb +2 -4
  18. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  19. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  20. data/lib/regexp_parser/expression/methods/tests.rb +40 -3
  21. data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
  22. data/lib/regexp_parser/expression/quantifier.rb +30 -17
  23. data/lib/regexp_parser/expression/sequence.rb +5 -9
  24. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  25. data/lib/regexp_parser/expression/shared.rb +37 -24
  26. data/lib/regexp_parser/expression/subexpression.rb +20 -18
  27. data/lib/regexp_parser/expression.rb +2 -0
  28. data/lib/regexp_parser/lexer.rb +15 -7
  29. data/lib/regexp_parser/parser.rb +85 -86
  30. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  31. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  32. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  33. data/lib/regexp_parser/scanner/properties/long.csv +11 -0
  34. data/lib/regexp_parser/scanner/properties/short.csv +2 -0
  35. data/lib/regexp_parser/scanner/property.rl +1 -1
  36. data/lib/regexp_parser/scanner/scanner.rl +35 -129
  37. data/lib/regexp_parser/scanner.rb +1084 -1303
  38. data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
  39. data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
  40. data/lib/regexp_parser/syntax/token/escape.rb +3 -1
  41. data/lib/regexp_parser/syntax/token/meta.rb +9 -2
  42. data/lib/regexp_parser/syntax/token/unicode_property.rb +17 -1
  43. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  44. data/lib/regexp_parser/version.rb +1 -1
  45. metadata +9 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04af46818e9d560362fea9b3fd24802b557ac145ed95f6e02580dd7cf5e8ddfc
4
- data.tar.gz: 75b7d30241f48ddf90c8cd68228fa928904ab6055ea755f4bdcf28361e645a4b
3
+ metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
4
+ data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
5
5
  SHA512:
6
- metadata.gz: 407025a9b14af76463260fca2a48f9fef4ab863e3dddf3f7f54101c1348611afa49d9973e850d9e1c84d6e5faf8f1a9d3d2da5dceaefe8dc4fefe7069ecd9280
7
- data.tar.gz: 9f3d2eb4264318511a82e9034c4c4a8a8e73e67e427945f0c9f745fd37b2f2f0ae8e30ba942f0920da3109b59436a5518dfc5e2f7669317de0214a0deb6f0e07
6
+ metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
7
+ data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1
data/CHANGELOG.md CHANGED
@@ -5,14 +5,73 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [Unreleased]
9
+
10
+ ## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
11
+
12
+ ### Fixed
13
+
14
+ - support for extpict unicode property, added in Ruby 2.6
15
+ - support for 10 unicode script/block properties added in Ruby 3.2
16
+
17
+ ## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
18
+
19
+ ### Added
20
+
21
+ - `Regexp::Expression::Shared#ends_at`
22
+ * e.g. `parse(/a +/x)[0].ends_at # => 3`
23
+ * e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
24
+ - `Regexp::Expression::Shared#{capturing?,comment?}`
25
+ * previously only available on capturing and comment groups
26
+ - `Regexp::Expression::Shared#{decorative?}`
27
+ * true for decorations: comment groups as well as comments and whitespace in x-mode
28
+ - `Regexp::Expression::Shared#parent`
29
+ - new format argument `:original` for `Regexp::Expression::Base#to_s`
30
+ * includes decorative elements between node and its quantifier
31
+ * e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
32
+ * using it is not needed when calling `Root#to_s` as Root can't be quantified
33
+ - support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
34
+ * in this case, only the expressions are passed to the block, no indices
35
+ - support calling test methods at Expression class level
36
+ - `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
37
+ - e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
38
+
39
+ ### Fixed
40
+
41
+ - `Regexp::Expression::Shared#full_length` with whitespace before quantifier
42
+ * e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
43
+ - `Subexpression#to_s` output with children with whitespace before their quantifier
44
+ * e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
45
+ * calling `#to_s` on sub-nodes still omits such decorative interludes by default
46
+ - use new `#to_s` format `:original` to include it
47
+ - e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
48
+ - fixed `Subexpression#te` behaving differently from other expressions
49
+ * only `Subexpression#te` used to include the quantifier
50
+ * now `#te` is the end index without quantifier, as for other expressions
51
+ - fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
52
+ * e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
53
+ * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
54
+ - fixed nested comment groups breaking local x-options
55
+ * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
56
+ - fixed nested comment groups breaking conditionals
57
+ * e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
58
+ - fixed quantifiers after comment groups being mis-assigned to that group
59
+ * e.g. in `/a(?#foo){3}/` (matches 'aaa')
60
+ - fixed Scanner accepting two cases of invalid Regexp syntax
61
+ * unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
62
+ * these are a `SyntaxError` in Ruby, so could only be passed as a String
63
+ * they now raise a `Regexp::Scanner::ScannerError`
64
+ - fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
65
+ - reduced verbosity of inspect / pretty print output
66
+
8
67
  ## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
9
68
 
10
69
  ### Added
11
70
 
12
71
  - `Regexp::Lexer.lex` now streams tokens when called with a block
13
- - it can now take arbitrarily large input, just like `Regexp::Scanner`
14
- - this also slightly improves `Regexp::Parser.parse` performance
15
- - note: `Regexp::Parser.parse` still does not and will not support streaming
72
+ * it can now take arbitrarily large input, just like `Regexp::Scanner`
73
+ * this also slightly improves `Regexp::Parser.parse` performance
74
+ * note: `Regexp::Parser.parse` still does not and will not support streaming
16
75
  - improved performance of `Subexpression#each_expression`
17
76
  - minor improvements to `Regexp::Scanner` performance
18
77
  - overall improvement of parse performance: about 10% for large Regexps
data/Gemfile CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
- gem 'ice_nine', '~> 0.11.2'
6
+ gem 'leto', '~> 2.0'
7
7
  gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.3'
8
+ gem 'regexp_property_values', '~> 1.4'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'benchmark-ips', '~> 2.1'
12
- gem 'gouteur'
12
+ gem 'gouteur', '~> 1.1'
13
13
  gem 'rubocop', '~> 1.7'
14
14
  end
15
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2022, Ammar Ali
1
+ Copyright (c) 2010, 2012-2023, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/README.md CHANGED
@@ -67,7 +67,7 @@ called with the results as follows:
67
67
  * **Scanner**: the block gets passed the results as they are scanned. See the
68
68
  example in the next section for details.
69
69
 
70
- * **Lexer**: after completion, the block gets passed the tokens one by one.
70
+ * **Lexer**: the block gets passed the tokens one by one as they are scanned.
71
71
  _The result of the block is returned._
72
72
 
73
73
  * **Parser**: after completion, the block gets passed the root expression.
@@ -126,7 +126,7 @@ parts of the pattern:
126
126
 
127
127
  ```ruby
128
128
  Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
129
- #=> ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
129
+ # => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
130
130
  ```
131
131
 
132
132
 
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
248
248
 
249
249
  ```ruby
250
250
  Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
251
- #=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
251
+ # => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
252
252
  ```
253
253
 
254
254
  #### Notes
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
262
262
  ### Parser
263
263
  Sits on top of the lexer and transforms the "stream" of Token objects emitted
264
264
  by it into a tree of Expression objects represented by an instance of the
265
- Expression::Root class.
265
+ `Expression::Root` class.
266
266
 
267
267
  See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
268
268
  wiki page for attributes and methods.
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
270
270
 
271
271
  #### Example
272
272
 
273
+ This example uses the tree traversal method `#each_expression`
274
+ and the method `#strfregexp` to print each object in the tree.
275
+
276
+ ```ruby
277
+ include_root = true
278
+ indent_offset = include_root ? 1 : 0
279
+
280
+ tree.each_expression(include_root) do |exp|
281
+ puts exp.strfregexp("%>> %c", indent_offset)
282
+ end
283
+
284
+ # Output
285
+ # > Regexp::Expression::Root
286
+ # > Regexp::Expression::Literal
287
+ # > Regexp::Expression::Group::Capture
288
+ # > Regexp::Expression::Literal
289
+ # > Regexp::Expression::Group::Capture
290
+ # > Regexp::Expression::Literal
291
+ # > Regexp::Expression::Literal
292
+ # > Regexp::Expression::Group::Named
293
+ # > Regexp::Expression::CharacterSet
294
+ ```
295
+
296
+ _Note: quantifiers do not appear in the output because they are members of the
297
+ Expression class. See the next section for details._
298
+
299
+ Another example, using `#traverse` for a more fine-grained tree traversal:
300
+
273
301
  ```ruby
274
302
  require 'regexp_parser'
275
303
 
@@ -295,34 +323,9 @@ end
295
323
  # exit: group `(?<name>[0-9]+)`
296
324
  ```
297
325
 
298
- Another example, using each_expression and strfregexp to print the object tree.
299
326
  _See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
300
327
  for more information on these methods._
301
328
 
302
- ```ruby
303
- include_root = true
304
- indent_offset = include_root ? 1 : 0
305
-
306
- tree.each_expression(include_root) do |exp, level_index|
307
- puts exp.strfregexp("%>> %c", indent_offset)
308
- end
309
-
310
- # Output
311
- # > Regexp::Expression::Root
312
- # > Regexp::Expression::Literal
313
- # > Regexp::Expression::Group::Capture
314
- # > Regexp::Expression::Literal
315
- # > Regexp::Expression::Group::Capture
316
- # > Regexp::Expression::Literal
317
- # > Regexp::Expression::Literal
318
- # > Regexp::Expression::Group::Named
319
- # > Regexp::Expression::CharacterSet
320
- ```
321
-
322
- _Note: quantifiers do not appear in the output because they are members of the
323
- Expression class. See the next section for details._
324
-
325
-
326
329
  ---
327
330
 
328
331
 
@@ -500,4 +503,4 @@ Documentation and books used while working on this project.
500
503
 
501
504
  ---
502
505
  ##### Copyright
503
- _Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
506
+ _Copyright (c) 2010-2023 Ammar Ali. See LICENSE file for details._
@@ -6,13 +6,6 @@ module Regexp::Expression
6
6
  init_from_token_and_options(token, options)
7
7
  end
8
8
 
9
- def initialize_copy(orig)
10
- self.text = orig.text.dup if orig.text
11
- self.options = orig.options.dup if orig.options
12
- self.quantifier = orig.quantifier.clone if orig.quantifier
13
- super
14
- end
15
-
16
9
  def to_re(format = :full)
17
10
  if set_level > 0
18
11
  warn "Calling #to_re on character set members is deprecated - "\
@@ -1,5 +1,5 @@
1
1
  module Regexp::Expression
2
- # A sequence of expressions, used by Alternation as one of its alternative.
2
+ # A sequence of expressions, used by Alternation as one of its alternatives.
3
3
  class Alternative < Regexp::Expression::Sequence; end
4
4
 
5
5
  class Alternation < Regexp::Expression::SequenceOperation
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :backref, one way or the other, in v3.0.0
3
2
  module Backreference
4
3
  class Base < Regexp::Expression::Base
5
4
  attr_accessor :referenced_expression
@@ -20,10 +19,6 @@ module Regexp::Expression
20
19
 
21
20
  super
22
21
  end
23
-
24
- def referential?
25
- true
26
- end
27
22
  end
28
23
 
29
24
  class Number < Backreference::Base
@@ -31,7 +26,7 @@ module Regexp::Expression
31
26
  alias reference number
32
27
 
33
28
  def initialize(token, options = {})
34
- @number = token.text[token.token.equal?(:number) ? 1..-1 : 3..-2].to_i
29
+ @number = token.text[/-?\d+/].to_i
35
30
  super
36
31
  end
37
32
  end
@@ -74,4 +69,7 @@ module Regexp::Expression
74
69
  end
75
70
  end
76
71
  end
72
+
73
+ # alias for symmetry between token symbol and Expression class name
74
+ Backref = Backreference
77
75
  end
@@ -1,10 +1,9 @@
1
1
  module Regexp::Expression
2
2
  class CharacterSet < Regexp::Expression::Subexpression
3
3
  class Range < Regexp::Expression::Subexpression
4
- def starts_at
5
- expressions.first.starts_at
4
+ def ts
5
+ (head = expressions.first) ? head.ts : @ts
6
6
  end
7
- alias :ts :starts_at
8
7
 
9
8
  def <<(exp)
10
9
  complete? and raise Regexp::Parser::Error,
@@ -15,10 +14,6 @@ module Regexp::Expression
15
14
  def complete?
16
15
  count == 2
17
16
  end
18
-
19
- def parts
20
- intersperse(expressions, text.dup)
21
- end
22
17
  end
23
18
  end
24
19
  end
@@ -19,9 +19,8 @@ module Regexp::Expression
19
19
  def close
20
20
  self.closed = true
21
21
  end
22
-
23
- def parts
24
- ["#{text}#{'^' if negated?}", *expressions, ']']
25
- end
26
22
  end
23
+
24
+ # alias for symmetry between token symbol and Expression class name
25
+ Set = CharacterSet
27
26
  end # module Regexp::Expression
@@ -20,10 +20,6 @@ module Regexp::Expression
20
20
  self.referenced_expression = orig.referenced_expression.dup
21
21
  super
22
22
  end
23
-
24
- def referential?
25
- true
26
- end
27
23
  end
28
24
 
29
25
  class Branch < Regexp::Expression::Sequence; end
@@ -35,9 +31,9 @@ module Regexp::Expression
35
31
  expressions.last << exp
36
32
  end
37
33
 
38
- def add_sequence(active_opts = {})
34
+ def add_sequence(active_opts = {}, params = { ts: 0 })
39
35
  raise TooManyBranches.new if branches.length == 2
40
- params = { conditional_level: conditional_level + 1 }
36
+ params = params.merge({ conditional_level: conditional_level + 1 })
41
37
  Branch.add_to(self, params, active_opts)
42
38
  end
43
39
  alias :branch :add_sequence
@@ -59,14 +55,6 @@ module Regexp::Expression
59
55
  condition.reference
60
56
  end
61
57
 
62
- def referential?
63
- true
64
- end
65
-
66
- def parts
67
- [text.dup, condition, *intersperse(branches, '|'), ')']
68
- end
69
-
70
58
  def initialize_copy(orig)
71
59
  self.referenced_expression = orig.referenced_expression.dup
72
60
  super
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
3
2
  module EscapeSequence
4
3
  class Base < Regexp::Expression::Base
5
4
  def codepoint
@@ -97,4 +96,7 @@ module Regexp::Expression
97
96
  end
98
97
  end
99
98
  end
99
+
100
+ # alias for symmetry between Token::* and Expression::*
101
+ Escape = EscapeSequence
100
102
  end
@@ -5,10 +5,12 @@ module Regexp::Expression
5
5
  end
6
6
  end
7
7
 
8
- class Comment < Regexp::Expression::FreeSpace; end
8
+ class Comment < Regexp::Expression::FreeSpace
9
+ end
9
10
 
10
11
  class WhiteSpace < Regexp::Expression::FreeSpace
11
12
  def merge(exp)
13
+ warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
12
14
  text << exp.text
13
15
  end
14
16
  end
@@ -1,13 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Group
3
3
  class Base < Regexp::Expression::Subexpression
4
- def parts
5
- [text.dup, *expressions, ')']
6
- end
7
-
8
- def capturing?; false end
9
-
10
- def comment?; false end
11
4
  end
12
5
 
13
6
  class Passive < Group::Base
@@ -18,14 +11,6 @@ module Regexp::Expression
18
11
  super
19
12
  end
20
13
 
21
- def parts
22
- if implicit?
23
- expressions
24
- else
25
- super
26
- end
27
- end
28
-
29
14
  def implicit?
30
15
  @implicit
31
16
  end
@@ -55,8 +40,6 @@ module Regexp::Expression
55
40
  class Capture < Group::Base
56
41
  attr_accessor :number, :number_at_level
57
42
  alias identifier number
58
-
59
- def capturing?; true end
60
43
  end
61
44
 
62
45
  class Named < Group::Capture
@@ -75,11 +58,6 @@ module Regexp::Expression
75
58
  end
76
59
 
77
60
  class Comment < Group::Base
78
- def parts
79
- [text.dup]
80
- end
81
-
82
- def comment?; true end
83
61
  end
84
62
  end
85
63
 
@@ -5,7 +5,11 @@ module Regexp::Expression
5
5
  end
6
6
 
7
7
  def name
8
- token.to_s
8
+ text[/\w+/]
9
9
  end
10
10
  end
11
+
12
+ # alias for symmetry between token symbol and Expression class name
13
+ Posixclass = PosixClass
14
+ Nonposixclass = PosixClass
11
15
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
- # TODO: unify name with token :property, one way or the other, in v3.0.0
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -11,7 +10,7 @@ module Regexp::Expression
11
10
  end
12
11
 
13
12
  def shortcut
14
- (Regexp::Scanner.short_prop_map.rassoc(token.to_s) || []).first
13
+ Regexp::Scanner.short_prop_map.key(token.to_s)
15
14
  end
16
15
  end
17
16
 
@@ -116,4 +115,8 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
118
+
119
+ # alias for symmetry between token symbol and Expression class name
120
+ Property = UnicodeProperty
121
+ Nonproperty = UnicodeProperty
119
122
  end # module Regexp::Expression
@@ -25,11 +25,9 @@ module Regexp::Expression
25
25
  def token_class
26
26
  if self == Root || self < Sequence
27
27
  nil # no token class because these objects are Parser-generated
28
- # TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
29
- elsif self == Alternation || self == CharacterType::Any
28
+ # TODO: synch exp class, token class & type names for this in v3.0.0
29
+ elsif self == CharacterType::Any
30
30
  Regexp::Syntax::Token::Meta
31
- elsif self <= EscapeSequence::Base
32
- Regexp::Syntax::Token::Escape
33
31
  else
34
32
  Regexp::Syntax::Token.const_get(name.split('::')[2])
35
33
  end
@@ -0,0 +1,23 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ # default implementation
4
+ def parts
5
+ [text.dup]
6
+ end
7
+
8
+ private
9
+
10
+ def intersperse(expressions, separator)
11
+ expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
12
+ end
13
+ end
14
+
15
+ CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
16
+ CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
17
+ Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
18
+ Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
19
+ Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
20
+ Group::Comment.class_eval { def parts; [text.dup] end }
21
+ Subexpression.class_eval { def parts; expressions end }
22
+ SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
23
+ end
@@ -0,0 +1,26 @@
1
+ module Regexp::Expression
2
+ module Shared
3
+ def inspect
4
+ [
5
+ "#<#{self.class}",
6
+ pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
7
+ ">"
8
+ ].join
9
+ end
10
+
11
+ # Make pretty-print work despite #inspect implementation.
12
+ def pretty_print(q)
13
+ q.pp_object(self)
14
+ end
15
+
16
+ # Called by pretty_print (ruby/pp) and #inspect.
17
+ def pretty_print_instance_variables
18
+ [
19
+ (:@text unless text.to_s.empty?),
20
+ (:@quantifier if quantified?),
21
+ (:@options unless options.empty?),
22
+ (:@expressions unless terminal?),
23
+ ].compact
24
+ end
25
+ end
26
+ end
@@ -95,12 +95,49 @@ module Regexp::Expression
95
95
  end
96
96
 
97
97
  # Deep-compare two expressions for equality.
98
+ #
99
+ # When changing the conditions, please make sure to update
100
+ # #pretty_print_instance_variables so that it includes all relevant values.
98
101
  def ==(other)
99
- other.class == self.class &&
100
- other.to_s == to_s &&
101
- other.options == options
102
+ self.class == other.class &&
103
+ text == other.text &&
104
+ quantifier == other.quantifier &&
105
+ options == other.options &&
106
+ (terminal? || expressions == other.expressions)
102
107
  end
103
108
  alias :=== :==
104
109
  alias :eql? :==
110
+
111
+ def optional?
112
+ quantified? && quantifier.min == 0
113
+ end
114
+
115
+ def quantified?
116
+ !quantifier.nil?
117
+ end
105
118
  end
119
+
120
+ Shared.class_eval { def terminal?; self.class.terminal? end }
121
+ Shared::ClassMethods.class_eval { def terminal?; true end }
122
+ Subexpression.instance_eval { def terminal?; false end }
123
+
124
+ Shared.class_eval { def capturing?; self.class.capturing? end }
125
+ Shared::ClassMethods.class_eval { def capturing?; false end }
126
+ Group::Capture.instance_eval { def capturing?; true end }
127
+
128
+ Shared.class_eval { def comment?; self.class.comment? end }
129
+ Shared::ClassMethods.class_eval { def comment?; false end }
130
+ Comment.instance_eval { def comment?; true end }
131
+ Group::Comment.instance_eval { def comment?; true end }
132
+
133
+ Shared.class_eval { def decorative?; self.class.decorative? end }
134
+ Shared::ClassMethods.class_eval { def decorative?; false end }
135
+ FreeSpace.instance_eval { def decorative?; true end }
136
+ Group::Comment.instance_eval { def decorative?; true end }
137
+
138
+ Shared.class_eval { def referential?; self.class.referential? end }
139
+ Shared::ClassMethods.class_eval { def referential?; false end }
140
+ Backreference::Base.instance_eval { def referential?; true end }
141
+ Conditional::Condition.instance_eval { def referential?; true end }
142
+ Conditional::Expression.instance_eval { def referential?; true end }
106
143
  end
@@ -1,6 +1,22 @@
1
1
  module Regexp::Expression
2
2
  class Subexpression < Regexp::Expression::Base
3
3
 
4
+ # Traverses the expression, passing each recursive child to the
5
+ # given block.
6
+ # If the block takes two arguments, the indices of the children within
7
+ # their parents are also passed to it.
8
+ def each_expression(include_self = false, &block)
9
+ return enum_for(__method__, include_self) unless block
10
+
11
+ if block.arity == 1
12
+ block.call(self) if include_self
13
+ each_expression_without_index(&block)
14
+ else
15
+ block.call(self, 0) if include_self
16
+ each_expression_with_index(&block)
17
+ end
18
+ end
19
+
4
20
  # Traverses the subexpression (depth-first, pre-order) and calls the given
5
21
  # block for each expression with three arguments; the traversal event,
6
22
  # the expression, and the index of the expression within its parent.
@@ -34,34 +50,31 @@ module Regexp::Expression
34
50
  end
35
51
  alias :walk :traverse
36
52
 
37
- # Iterates over the expressions of this expression as an array, passing
38
- # the expression and its index within its parent to the given block.
39
- def each_expression(include_self = false, &block)
40
- return enum_for(__method__, include_self) unless block_given?
53
+ # Returns a new array with the results of calling the given block once
54
+ # for every expression. If a block is not given, returns an array with
55
+ # each expression and its level index as an array.
56
+ def flat_map(include_self = false, &block)
57
+ case block && block.arity
58
+ when nil then each_expression(include_self).to_a
59
+ when 2 then each_expression(include_self).map(&block)
60
+ else each_expression(include_self).map { |exp| block.call(exp) }
61
+ end
62
+ end
41
63
 
42
- block.call(self, 0) if include_self
64
+ protected
43
65
 
66
+ def each_expression_with_index(&block)
44
67
  each_with_index do |exp, index|
45
68
  block.call(exp, index)
46
- exp.each_expression(&block) unless exp.terminal?
69
+ exp.each_expression_with_index(&block) unless exp.terminal?
47
70
  end
48
71
  end
49
72
 
50
- # Returns a new array with the results of calling the given block once
51
- # for every expression. If a block is not given, returns an array with
52
- # each expression and its level index as an array.
53
- def flat_map(include_self = false)
54
- result = []
55
-
56
- each_expression(include_self) do |exp, index|
57
- if block_given?
58
- result << yield(exp, index)
59
- else
60
- result << [exp, index]
61
- end
73
+ def each_expression_without_index(&block)
74
+ each do |exp|
75
+ block.call(exp)
76
+ exp.each_expression_without_index(&block) unless exp.terminal?
62
77
  end
63
-
64
- result
65
78
  end
66
79
  end
67
80
  end