regexp_parser 2.6.2 → 2.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +67 -0
- data/Gemfile +2 -2
- data/README.md +32 -29
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -2
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -6
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -10
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -20
- data/lib/regexp_parser/expression/subexpression.rb +20 -15
- data/lib/regexp_parser/expression.rb +2 -0
- data/lib/regexp_parser/lexer.rb +76 -36
- data/lib/regexp_parser/parser.rb +97 -97
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/mapping.rb +89 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +90 -169
- data/lib/regexp_parser/scanner.rb +1157 -1330
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +3 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
- data/lib/regexp_parser/syntax/versions.rb +2 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +10 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bed928e92928d8f595241456658e516f3afd2474196ca4d6fdbb849c072d5024
|
4
|
+
data.tar.gz: 48d50057af6883cd2d67050fc05aed79e87342f6067eb80734729a8440c08a69
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 455e79dd780d7d5c130fae56140158615195601f68ea9eb83367d0b9faaf631586bbf12f5b9243d16bb42d29eeb57ba595f87a3b4604b32af059dc9a72c4d6d4
|
7
|
+
data.tar.gz: 37216a681eda06118b7317e64cab14cb06e39e4923433225598b60b8b36684ab831e4d90960516adbfdaa16811b274c2181eb38a13ddd259fb6790cbeef99ebf
|
data/CHANGELOG.md
CHANGED
@@ -7,6 +7,73 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
7
7
|
|
8
8
|
## [Unreleased]
|
9
9
|
|
10
|
+
## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- `Regexp::Expression::Shared#ends_at`
|
15
|
+
* e.g. `parse(/a +/x)[0].ends_at # => 3`
|
16
|
+
* e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
|
17
|
+
- `Regexp::Expression::Shared#{capturing?,comment?}`
|
18
|
+
* previously only available on capturing and comment groups
|
19
|
+
- `Regexp::Expression::Shared#{decorative?}`
|
20
|
+
* true for decorations: comment groups as well as comments and whitespace in x-mode
|
21
|
+
- `Regexp::Expression::Shared#parent`
|
22
|
+
- new format argument `:original` for `Regexp::Expression::Base#to_s`
|
23
|
+
* includes decorative elements between node and its quantifier
|
24
|
+
* e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
|
25
|
+
* using it is not needed when calling `Root#to_s` as Root can't be quantified
|
26
|
+
- support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
|
27
|
+
* in this case, only the expressions are passed to the block, no indices
|
28
|
+
- support calling test methods at Expression class level
|
29
|
+
- `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
|
30
|
+
- e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
|
31
|
+
|
32
|
+
### Fixed
|
33
|
+
|
34
|
+
- `Regexp::Expression::Shared#full_length` with whitespace before quantifier
|
35
|
+
* e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
|
36
|
+
- `Subexpression#to_s` output with children with whitespace before their quantifier
|
37
|
+
* e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
|
38
|
+
* calling `#to_s` on sub-nodes still omits such decorative interludes by default
|
39
|
+
- use new `#to_s` format `:original` to include it
|
40
|
+
- e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
|
41
|
+
- fixed `Subexpression#te` behaving differently from other expressions
|
42
|
+
* only `Subexpression#te` used to include the quantifier
|
43
|
+
* now `#te` is the end index without quantifier, as for other expressions
|
44
|
+
- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
|
45
|
+
* e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
|
46
|
+
* e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
|
47
|
+
- fixed nested comment groups breaking local x-options
|
48
|
+
* e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
|
49
|
+
- fixed nested comment groups breaking conditionals
|
50
|
+
* e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
|
51
|
+
- fixed quantifiers after comment groups being mis-assigned to that group
|
52
|
+
* e.g. in `/a(?#foo){3}/` (matches 'aaa')
|
53
|
+
- fixed Scanner accepting two cases of invalid Regexp syntax
|
54
|
+
* unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
|
55
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
56
|
+
* they now raise a `Regexp::Scanner::ScannerError`
|
57
|
+
- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
|
58
|
+
- reduced verbosity of inspect / pretty print output
|
59
|
+
|
60
|
+
## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
61
|
+
|
62
|
+
### Added
|
63
|
+
|
64
|
+
- `Regexp::Lexer.lex` now streams tokens when called with a block
|
65
|
+
* it can now take arbitrarily large input, just like `Regexp::Scanner`
|
66
|
+
* this also slightly improves `Regexp::Parser.parse` performance
|
67
|
+
* note: `Regexp::Parser.parse` still does not and will not support streaming
|
68
|
+
- improved performance of `Subexpression#each_expression`
|
69
|
+
- minor improvements to `Regexp::Scanner` performance
|
70
|
+
- overall improvement of parse performance: about 10% for large Regexps
|
71
|
+
|
72
|
+
### Fixed
|
73
|
+
|
74
|
+
- parsing of octal escape sequences in sets, e.g. `[\141]`
|
75
|
+
* thanks to [Randy Stauner](https://github.com/rwstauner) for the report
|
76
|
+
|
10
77
|
## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
78
|
|
12
79
|
### Fixed
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
6
|
+
gem 'leto', '~> 2.0'
|
7
7
|
gem 'rake', '~> 13.0'
|
8
8
|
gem 'regexp_property_values', '~> 1.3'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
|
-
gem 'gouteur'
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
13
|
gem 'rubocop', '~> 1.7'
|
14
14
|
end
|
15
15
|
end
|
data/README.md
CHANGED
@@ -67,7 +67,7 @@ called with the results as follows:
|
|
67
67
|
* **Scanner**: the block gets passed the results as they are scanned. See the
|
68
68
|
example in the next section for details.
|
69
69
|
|
70
|
-
* **Lexer**:
|
70
|
+
* **Lexer**: the block gets passed the tokens one by one as they are scanned.
|
71
71
|
_The result of the block is returned._
|
72
72
|
|
73
73
|
* **Parser**: after completion, the block gets passed the root expression.
|
@@ -126,7 +126,7 @@ parts of the pattern:
|
|
126
126
|
|
127
127
|
```ruby
|
128
128
|
Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
|
129
|
-
|
129
|
+
# => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
|
130
130
|
```
|
131
131
|
|
132
132
|
|
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
|
|
248
248
|
|
249
249
|
```ruby
|
250
250
|
Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
|
251
|
-
|
251
|
+
# => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
|
252
252
|
```
|
253
253
|
|
254
254
|
#### Notes
|
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
|
|
262
262
|
### Parser
|
263
263
|
Sits on top of the lexer and transforms the "stream" of Token objects emitted
|
264
264
|
by it into a tree of Expression objects represented by an instance of the
|
265
|
-
Expression::Root class.
|
265
|
+
`Expression::Root` class.
|
266
266
|
|
267
267
|
See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
|
268
268
|
wiki page for attributes and methods.
|
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
|
|
270
270
|
|
271
271
|
#### Example
|
272
272
|
|
273
|
+
This example uses the tree traversal method `#each_expression`
|
274
|
+
and the method `#strfregexp` to print each object in the tree.
|
275
|
+
|
276
|
+
```ruby
|
277
|
+
include_root = true
|
278
|
+
indent_offset = include_root ? 1 : 0
|
279
|
+
|
280
|
+
tree.each_expression(include_root) do |exp|
|
281
|
+
puts exp.strfregexp("%>> %c", indent_offset)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Output
|
285
|
+
# > Regexp::Expression::Root
|
286
|
+
# > Regexp::Expression::Literal
|
287
|
+
# > Regexp::Expression::Group::Capture
|
288
|
+
# > Regexp::Expression::Literal
|
289
|
+
# > Regexp::Expression::Group::Capture
|
290
|
+
# > Regexp::Expression::Literal
|
291
|
+
# > Regexp::Expression::Literal
|
292
|
+
# > Regexp::Expression::Group::Named
|
293
|
+
# > Regexp::Expression::CharacterSet
|
294
|
+
```
|
295
|
+
|
296
|
+
_Note: quantifiers do not appear in the output because they are members of the
|
297
|
+
Expression class. See the next section for details._
|
298
|
+
|
299
|
+
Another example, using `#traverse` for a more fine-grained tree traversal:
|
300
|
+
|
273
301
|
```ruby
|
274
302
|
require 'regexp_parser'
|
275
303
|
|
@@ -295,34 +323,9 @@ end
|
|
295
323
|
# exit: group `(?<name>[0-9]+)`
|
296
324
|
```
|
297
325
|
|
298
|
-
Another example, using each_expression and strfregexp to print the object tree.
|
299
326
|
_See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
|
300
327
|
for more information on these methods._
|
301
328
|
|
302
|
-
```ruby
|
303
|
-
include_root = true
|
304
|
-
indent_offset = include_root ? 1 : 0
|
305
|
-
|
306
|
-
tree.each_expression(include_root) do |exp, level_index|
|
307
|
-
puts exp.strfregexp("%>> %c", indent_offset)
|
308
|
-
end
|
309
|
-
|
310
|
-
# Output
|
311
|
-
# > Regexp::Expression::Root
|
312
|
-
# > Regexp::Expression::Literal
|
313
|
-
# > Regexp::Expression::Group::Capture
|
314
|
-
# > Regexp::Expression::Literal
|
315
|
-
# > Regexp::Expression::Group::Capture
|
316
|
-
# > Regexp::Expression::Literal
|
317
|
-
# > Regexp::Expression::Literal
|
318
|
-
# > Regexp::Expression::Group::Named
|
319
|
-
# > Regexp::Expression::CharacterSet
|
320
|
-
```
|
321
|
-
|
322
|
-
_Note: quantifiers do not appear in the output because they are members of the
|
323
|
-
Expression class. See the next section for details._
|
324
|
-
|
325
|
-
|
326
329
|
---
|
327
330
|
|
328
331
|
|
@@ -6,13 +6,6 @@ module Regexp::Expression
|
|
6
6
|
init_from_token_and_options(token, options)
|
7
7
|
end
|
8
8
|
|
9
|
-
def initialize_copy(orig)
|
10
|
-
self.text = orig.text.dup if orig.text
|
11
|
-
self.options = orig.options.dup if orig.options
|
12
|
-
self.quantifier = orig.quantifier.clone if orig.quantifier
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
9
|
def to_re(format = :full)
|
17
10
|
if set_level > 0
|
18
11
|
warn "Calling #to_re on character set members is deprecated - "\
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
3
2
|
module Backreference
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
attr_accessor :referenced_expression
|
@@ -27,7 +26,7 @@ module Regexp::Expression
|
|
27
26
|
alias reference number
|
28
27
|
|
29
28
|
def initialize(token, options = {})
|
30
|
-
@number = token.text[
|
29
|
+
@number = token.text[/-?\d+/].to_i
|
31
30
|
super
|
32
31
|
end
|
33
32
|
end
|
@@ -70,4 +69,7 @@ module Regexp::Expression
|
|
70
69
|
end
|
71
70
|
end
|
72
71
|
end
|
72
|
+
|
73
|
+
# alias for symmetry between token symbol and Expression class name
|
74
|
+
Backref = Backreference
|
73
75
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
9
|
complete? and raise Regexp::Parser::Error,
|
@@ -15,10 +14,6 @@ module Regexp::Expression
|
|
15
14
|
def complete?
|
16
15
|
count == 2
|
17
16
|
end
|
18
|
-
|
19
|
-
def parts
|
20
|
-
intersperse(expressions, text.dup)
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -19,9 +19,8 @@ module Regexp::Expression
|
|
19
19
|
def close
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
|
-
|
23
|
-
def parts
|
24
|
-
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
|
-
end
|
26
22
|
end
|
23
|
+
|
24
|
+
# alias for symmetry between token symbol and Expression class name
|
25
|
+
Set = CharacterSet
|
27
26
|
end # module Regexp::Expression
|
@@ -31,9 +31,9 @@ module Regexp::Expression
|
|
31
31
|
expressions.last << exp
|
32
32
|
end
|
33
33
|
|
34
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
35
35
|
raise TooManyBranches.new if branches.length == 2
|
36
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
37
37
|
Branch.add_to(self, params, active_opts)
|
38
38
|
end
|
39
39
|
alias :branch :add_sequence
|
@@ -55,10 +55,6 @@ module Regexp::Expression
|
|
55
55
|
condition.reference
|
56
56
|
end
|
57
57
|
|
58
|
-
def parts
|
59
|
-
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
|
-
end
|
61
|
-
|
62
58
|
def initialize_copy(orig)
|
63
59
|
self.referenced_expression = orig.referenced_expression.dup
|
64
60
|
super
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
|
3
2
|
module EscapeSequence
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def codepoint
|
@@ -97,4 +96,7 @@ module Regexp::Expression
|
|
97
96
|
end
|
98
97
|
end
|
99
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
100
102
|
end
|
@@ -5,10 +5,12 @@ module Regexp::Expression
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
9
10
|
|
10
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
11
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
12
14
|
text << exp.text
|
13
15
|
end
|
14
16
|
end
|
@@ -1,13 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def parts
|
5
|
-
[text.dup, *expressions, ')']
|
6
|
-
end
|
7
|
-
|
8
|
-
def capturing?; false end
|
9
|
-
|
10
|
-
def comment?; false end
|
11
4
|
end
|
12
5
|
|
13
6
|
class Passive < Group::Base
|
@@ -18,14 +11,6 @@ module Regexp::Expression
|
|
18
11
|
super
|
19
12
|
end
|
20
13
|
|
21
|
-
def parts
|
22
|
-
if implicit?
|
23
|
-
expressions
|
24
|
-
else
|
25
|
-
super
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
14
|
def implicit?
|
30
15
|
@implicit
|
31
16
|
end
|
@@ -55,8 +40,6 @@ module Regexp::Expression
|
|
55
40
|
class Capture < Group::Base
|
56
41
|
attr_accessor :number, :number_at_level
|
57
42
|
alias identifier number
|
58
|
-
|
59
|
-
def capturing?; true end
|
60
43
|
end
|
61
44
|
|
62
45
|
class Named < Group::Capture
|
@@ -75,11 +58,6 @@ module Regexp::Expression
|
|
75
58
|
end
|
76
59
|
|
77
60
|
class Comment < Group::Base
|
78
|
-
def parts
|
79
|
-
[text.dup]
|
80
|
-
end
|
81
|
-
|
82
|
-
def comment?; true end
|
83
61
|
end
|
84
62
|
end
|
85
63
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :property, one way or the other, in v3.0.0
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -11,7 +10,7 @@ module Regexp::Expression
|
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
14
|
-
|
13
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
14
|
end
|
16
15
|
end
|
17
16
|
|
@@ -116,4 +115,8 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
118
|
+
|
119
|
+
# alias for symmetry between token symbol and Expression class name
|
120
|
+
Property = UnicodeProperty
|
121
|
+
Nonproperty = UnicodeProperty
|
119
122
|
end # module Regexp::Expression
|
@@ -25,11 +25,9 @@ module Regexp::Expression
|
|
25
25
|
def token_class
|
26
26
|
if self == Root || self < Sequence
|
27
27
|
nil # no token class because these objects are Parser-generated
|
28
|
-
# TODO: synch exp
|
29
|
-
elsif self ==
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
30
|
Regexp::Syntax::Token::Meta
|
31
|
-
elsif self <= EscapeSequence::Base
|
32
|
-
Regexp::Syntax::Token::Escape
|
33
31
|
else
|
34
32
|
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
33
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -95,12 +95,49 @@ module Regexp::Expression
|
|
95
95
|
end
|
96
96
|
|
97
97
|
# Deep-compare two expressions for equality.
|
98
|
+
#
|
99
|
+
# When changing the conditions, please make sure to update
|
100
|
+
# #pretty_print_instance_variables so that it includes all relevant values.
|
98
101
|
def ==(other)
|
99
|
-
|
100
|
-
other.
|
101
|
-
other.
|
102
|
+
self.class == other.class &&
|
103
|
+
text == other.text &&
|
104
|
+
quantifier == other.quantifier &&
|
105
|
+
options == other.options &&
|
106
|
+
(terminal? || expressions == other.expressions)
|
102
107
|
end
|
103
108
|
alias :=== :==
|
104
109
|
alias :eql? :==
|
110
|
+
|
111
|
+
def optional?
|
112
|
+
quantified? && quantifier.min == 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def quantified?
|
116
|
+
!quantifier.nil?
|
117
|
+
end
|
105
118
|
end
|
119
|
+
|
120
|
+
Shared.class_eval { def terminal?; self.class.terminal? end }
|
121
|
+
Shared::ClassMethods.class_eval { def terminal?; true end }
|
122
|
+
Subexpression.instance_eval { def terminal?; false end }
|
123
|
+
|
124
|
+
Shared.class_eval { def capturing?; self.class.capturing? end }
|
125
|
+
Shared::ClassMethods.class_eval { def capturing?; false end }
|
126
|
+
Group::Capture.instance_eval { def capturing?; true end }
|
127
|
+
|
128
|
+
Shared.class_eval { def comment?; self.class.comment? end }
|
129
|
+
Shared::ClassMethods.class_eval { def comment?; false end }
|
130
|
+
Comment.instance_eval { def comment?; true end }
|
131
|
+
Group::Comment.instance_eval { def comment?; true end }
|
132
|
+
|
133
|
+
Shared.class_eval { def decorative?; self.class.decorative? end }
|
134
|
+
Shared::ClassMethods.class_eval { def decorative?; false end }
|
135
|
+
FreeSpace.instance_eval { def decorative?; true end }
|
136
|
+
Group::Comment.instance_eval { def decorative?; true end }
|
137
|
+
|
138
|
+
Shared.class_eval { def referential?; self.class.referential? end }
|
139
|
+
Shared::ClassMethods.class_eval { def referential?; false end }
|
140
|
+
Backreference::Base.instance_eval { def referential?; true end }
|
141
|
+
Conditional::Condition.instance_eval { def referential?; true end }
|
142
|
+
Conditional::Expression.instance_eval { def referential?; true end }
|
106
143
|
end
|
@@ -1,6 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Subexpression < Regexp::Expression::Base
|
3
3
|
|
4
|
+
# Traverses the expression, passing each recursive child to the
|
5
|
+
# given block.
|
6
|
+
# If the block takes two arguments, the indices of the children within
|
7
|
+
# their parents are also passed to it.
|
8
|
+
def each_expression(include_self = false, &block)
|
9
|
+
return enum_for(__method__, include_self) unless block
|
10
|
+
|
11
|
+
if block.arity == 1
|
12
|
+
block.call(self) if include_self
|
13
|
+
each_expression_without_index(&block)
|
14
|
+
else
|
15
|
+
block.call(self, 0) if include_self
|
16
|
+
each_expression_with_index(&block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
4
20
|
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
21
|
# block for each expression with three arguments; the traversal event,
|
6
22
|
# the expression, and the index of the expression within its parent.
|
@@ -34,31 +50,31 @@ module Regexp::Expression
|
|
34
50
|
end
|
35
51
|
alias :walk :traverse
|
36
52
|
|
37
|
-
# Iterates over the expressions of this expression as an array, passing
|
38
|
-
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false)
|
40
|
-
return enum_for(__method__, include_self) unless block_given?
|
41
|
-
|
42
|
-
traverse(include_self) do |event, exp, index|
|
43
|
-
yield(exp, index) unless event == :exit
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
53
|
# Returns a new array with the results of calling the given block once
|
48
54
|
# for every expression. If a block is not given, returns an array with
|
49
55
|
# each expression and its level index as an array.
|
50
|
-
def flat_map(include_self = false)
|
51
|
-
|
56
|
+
def flat_map(include_self = false, &block)
|
57
|
+
case block && block.arity
|
58
|
+
when nil then each_expression(include_self).to_a
|
59
|
+
when 2 then each_expression(include_self).map(&block)
|
60
|
+
else each_expression(include_self).map { |exp| block.call(exp) }
|
61
|
+
end
|
62
|
+
end
|
52
63
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
64
|
+
protected
|
65
|
+
|
66
|
+
def each_expression_with_index(&block)
|
67
|
+
each_with_index do |exp, index|
|
68
|
+
block.call(exp, index)
|
69
|
+
exp.each_expression_with_index(&block) unless exp.terminal?
|
59
70
|
end
|
71
|
+
end
|
60
72
|
|
61
|
-
|
73
|
+
def each_expression_without_index(&block)
|
74
|
+
each do |exp|
|
75
|
+
block.call(exp)
|
76
|
+
exp.each_expression_without_index(&block) unless exp.terminal?
|
77
|
+
end
|
62
78
|
end
|
63
79
|
end
|
64
80
|
end
|