regexp_parser 2.7.0 → 2.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +62 -3
- data/Gemfile +3 -3
- data/LICENSE +1 -1
- data/README.md +33 -30
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +4 -6
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +3 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/unicode_property.rb +5 -2
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +33 -20
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -9
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -24
- data/lib/regexp_parser/expression/subexpression.rb +20 -18
- data/lib/regexp_parser/expression.rb +2 -0
- data/lib/regexp_parser/lexer.rb +15 -7
- data/lib/regexp_parser/parser.rb +85 -86
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +11 -0
- data/lib/regexp_parser/scanner/properties/short.csv +2 -0
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +35 -129
- data/lib/regexp_parser/scanner.rb +1084 -1303
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +17 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/version.rb +1 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
|
4
|
+
data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
|
7
|
+
data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1
|
data/CHANGELOG.md
CHANGED
@@ -5,14 +5,73 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
+
## [Unreleased]
|
9
|
+
|
10
|
+
## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
- support for extpict unicode property, added in Ruby 2.6
|
15
|
+
- support for 10 unicode script/block properties added in Ruby 3.2
|
16
|
+
|
17
|
+
## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
18
|
+
|
19
|
+
### Added
|
20
|
+
|
21
|
+
- `Regexp::Expression::Shared#ends_at`
|
22
|
+
* e.g. `parse(/a +/x)[0].ends_at # => 3`
|
23
|
+
* e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
|
24
|
+
- `Regexp::Expression::Shared#{capturing?,comment?}`
|
25
|
+
* previously only available on capturing and comment groups
|
26
|
+
- `Regexp::Expression::Shared#{decorative?}`
|
27
|
+
* true for decorations: comment groups as well as comments and whitespace in x-mode
|
28
|
+
- `Regexp::Expression::Shared#parent`
|
29
|
+
- new format argument `:original` for `Regexp::Expression::Base#to_s`
|
30
|
+
* includes decorative elements between node and its quantifier
|
31
|
+
* e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
|
32
|
+
* using it is not needed when calling `Root#to_s` as Root can't be quantified
|
33
|
+
- support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
|
34
|
+
* in this case, only the expressions are passed to the block, no indices
|
35
|
+
- support calling test methods at Expression class level
|
36
|
+
- `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
|
37
|
+
- e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
|
38
|
+
|
39
|
+
### Fixed
|
40
|
+
|
41
|
+
- `Regexp::Expression::Shared#full_length` with whitespace before quantifier
|
42
|
+
* e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
|
43
|
+
- `Subexpression#to_s` output with children with whitespace before their quantifier
|
44
|
+
* e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
|
45
|
+
* calling `#to_s` on sub-nodes still omits such decorative interludes by default
|
46
|
+
- use new `#to_s` format `:original` to include it
|
47
|
+
- e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
|
48
|
+
- fixed `Subexpression#te` behaving differently from other expressions
|
49
|
+
* only `Subexpression#te` used to include the quantifier
|
50
|
+
* now `#te` is the end index without quantifier, as for other expressions
|
51
|
+
- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
|
52
|
+
* e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
|
53
|
+
* e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
|
54
|
+
- fixed nested comment groups breaking local x-options
|
55
|
+
* e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
|
56
|
+
- fixed nested comment groups breaking conditionals
|
57
|
+
* e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
|
58
|
+
- fixed quantifiers after comment groups being mis-assigned to that group
|
59
|
+
* e.g. in `/a(?#foo){3}/` (matches 'aaa')
|
60
|
+
- fixed Scanner accepting two cases of invalid Regexp syntax
|
61
|
+
* unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
|
62
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
63
|
+
* they now raise a `Regexp::Scanner::ScannerError`
|
64
|
+
- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
|
65
|
+
- reduced verbosity of inspect / pretty print output
|
66
|
+
|
8
67
|
## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
9
68
|
|
10
69
|
### Added
|
11
70
|
|
12
71
|
- `Regexp::Lexer.lex` now streams tokens when called with a block
|
13
|
-
|
14
|
-
|
15
|
-
|
72
|
+
* it can now take arbitrarily large input, just like `Regexp::Scanner`
|
73
|
+
* this also slightly improves `Regexp::Parser.parse` performance
|
74
|
+
* note: `Regexp::Parser.parse` still does not and will not support streaming
|
16
75
|
- improved performance of `Subexpression#each_expression`
|
17
76
|
- minor improvements to `Regexp::Scanner` performance
|
18
77
|
- overall improvement of parse performance: about 10% for large Regexps
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
6
|
+
gem 'leto', '~> 2.0'
|
7
7
|
gem 'rake', '~> 13.0'
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
+
gem 'regexp_property_values', '~> 1.4'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
|
-
gem 'gouteur'
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
13
|
gem 'rubocop', '~> 1.7'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -67,7 +67,7 @@ called with the results as follows:
|
|
67
67
|
* **Scanner**: the block gets passed the results as they are scanned. See the
|
68
68
|
example in the next section for details.
|
69
69
|
|
70
|
-
* **Lexer**:
|
70
|
+
* **Lexer**: the block gets passed the tokens one by one as they are scanned.
|
71
71
|
_The result of the block is returned._
|
72
72
|
|
73
73
|
* **Parser**: after completion, the block gets passed the root expression.
|
@@ -126,7 +126,7 @@ parts of the pattern:
|
|
126
126
|
|
127
127
|
```ruby
|
128
128
|
Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
|
129
|
-
|
129
|
+
# => ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
|
130
130
|
```
|
131
131
|
|
132
132
|
|
@@ -248,7 +248,7 @@ by a quantifier that only applies to it.
|
|
248
248
|
|
249
249
|
```ruby
|
250
250
|
Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
|
251
|
-
|
251
|
+
# => ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
|
252
252
|
```
|
253
253
|
|
254
254
|
#### Notes
|
@@ -262,7 +262,7 @@ Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
|
|
262
262
|
### Parser
|
263
263
|
Sits on top of the lexer and transforms the "stream" of Token objects emitted
|
264
264
|
by it into a tree of Expression objects represented by an instance of the
|
265
|
-
Expression::Root class.
|
265
|
+
`Expression::Root` class.
|
266
266
|
|
267
267
|
See the [Expression Objects](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
|
268
268
|
wiki page for attributes and methods.
|
@@ -270,6 +270,34 @@ wiki page for attributes and methods.
|
|
270
270
|
|
271
271
|
#### Example
|
272
272
|
|
273
|
+
This example uses the tree traversal method `#each_expression`
|
274
|
+
and the method `#strfregexp` to print each object in the tree.
|
275
|
+
|
276
|
+
```ruby
|
277
|
+
include_root = true
|
278
|
+
indent_offset = include_root ? 1 : 0
|
279
|
+
|
280
|
+
tree.each_expression(include_root) do |exp|
|
281
|
+
puts exp.strfregexp("%>> %c", indent_offset)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Output
|
285
|
+
# > Regexp::Expression::Root
|
286
|
+
# > Regexp::Expression::Literal
|
287
|
+
# > Regexp::Expression::Group::Capture
|
288
|
+
# > Regexp::Expression::Literal
|
289
|
+
# > Regexp::Expression::Group::Capture
|
290
|
+
# > Regexp::Expression::Literal
|
291
|
+
# > Regexp::Expression::Literal
|
292
|
+
# > Regexp::Expression::Group::Named
|
293
|
+
# > Regexp::Expression::CharacterSet
|
294
|
+
```
|
295
|
+
|
296
|
+
_Note: quantifiers do not appear in the output because they are members of the
|
297
|
+
Expression class. See the next section for details._
|
298
|
+
|
299
|
+
Another example, using `#traverse` for a more fine-grained tree traversal:
|
300
|
+
|
273
301
|
```ruby
|
274
302
|
require 'regexp_parser'
|
275
303
|
|
@@ -295,34 +323,9 @@ end
|
|
295
323
|
# exit: group `(?<name>[0-9]+)`
|
296
324
|
```
|
297
325
|
|
298
|
-
Another example, using each_expression and strfregexp to print the object tree.
|
299
326
|
_See the traverse.rb and strfregexp.rb files under `lib/regexp_parser/expression/methods`
|
300
327
|
for more information on these methods._
|
301
328
|
|
302
|
-
```ruby
|
303
|
-
include_root = true
|
304
|
-
indent_offset = include_root ? 1 : 0
|
305
|
-
|
306
|
-
tree.each_expression(include_root) do |exp, level_index|
|
307
|
-
puts exp.strfregexp("%>> %c", indent_offset)
|
308
|
-
end
|
309
|
-
|
310
|
-
# Output
|
311
|
-
# > Regexp::Expression::Root
|
312
|
-
# > Regexp::Expression::Literal
|
313
|
-
# > Regexp::Expression::Group::Capture
|
314
|
-
# > Regexp::Expression::Literal
|
315
|
-
# > Regexp::Expression::Group::Capture
|
316
|
-
# > Regexp::Expression::Literal
|
317
|
-
# > Regexp::Expression::Literal
|
318
|
-
# > Regexp::Expression::Group::Named
|
319
|
-
# > Regexp::Expression::CharacterSet
|
320
|
-
```
|
321
|
-
|
322
|
-
_Note: quantifiers do not appear in the output because they are members of the
|
323
|
-
Expression class. See the next section for details._
|
324
|
-
|
325
|
-
|
326
329
|
---
|
327
330
|
|
328
331
|
|
@@ -500,4 +503,4 @@ Documentation and books used while working on this project.
|
|
500
503
|
|
501
504
|
---
|
502
505
|
##### Copyright
|
503
|
-
_Copyright (c) 2010-
|
506
|
+
_Copyright (c) 2010-2023 Ammar Ali. See LICENSE file for details._
|
@@ -6,13 +6,6 @@ module Regexp::Expression
|
|
6
6
|
init_from_token_and_options(token, options)
|
7
7
|
end
|
8
8
|
|
9
|
-
def initialize_copy(orig)
|
10
|
-
self.text = orig.text.dup if orig.text
|
11
|
-
self.options = orig.options.dup if orig.options
|
12
|
-
self.quantifier = orig.quantifier.clone if orig.quantifier
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
9
|
def to_re(format = :full)
|
17
10
|
if set_level > 0
|
18
11
|
warn "Calling #to_re on character set members is deprecated - "\
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
3
2
|
module Backreference
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
attr_accessor :referenced_expression
|
@@ -20,10 +19,6 @@ module Regexp::Expression
|
|
20
19
|
|
21
20
|
super
|
22
21
|
end
|
23
|
-
|
24
|
-
def referential?
|
25
|
-
true
|
26
|
-
end
|
27
22
|
end
|
28
23
|
|
29
24
|
class Number < Backreference::Base
|
@@ -31,7 +26,7 @@ module Regexp::Expression
|
|
31
26
|
alias reference number
|
32
27
|
|
33
28
|
def initialize(token, options = {})
|
34
|
-
@number = token.text[
|
29
|
+
@number = token.text[/-?\d+/].to_i
|
35
30
|
super
|
36
31
|
end
|
37
32
|
end
|
@@ -74,4 +69,7 @@ module Regexp::Expression
|
|
74
69
|
end
|
75
70
|
end
|
76
71
|
end
|
72
|
+
|
73
|
+
# alias for symmetry between token symbol and Expression class name
|
74
|
+
Backref = Backreference
|
77
75
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
9
|
complete? and raise Regexp::Parser::Error,
|
@@ -15,10 +14,6 @@ module Regexp::Expression
|
|
15
14
|
def complete?
|
16
15
|
count == 2
|
17
16
|
end
|
18
|
-
|
19
|
-
def parts
|
20
|
-
intersperse(expressions, text.dup)
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -19,9 +19,8 @@ module Regexp::Expression
|
|
19
19
|
def close
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
|
-
|
23
|
-
def parts
|
24
|
-
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
|
-
end
|
26
22
|
end
|
23
|
+
|
24
|
+
# alias for symmetry between token symbol and Expression class name
|
25
|
+
Set = CharacterSet
|
27
26
|
end # module Regexp::Expression
|
@@ -20,10 +20,6 @@ module Regexp::Expression
|
|
20
20
|
self.referenced_expression = orig.referenced_expression.dup
|
21
21
|
super
|
22
22
|
end
|
23
|
-
|
24
|
-
def referential?
|
25
|
-
true
|
26
|
-
end
|
27
23
|
end
|
28
24
|
|
29
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -35,9 +31,9 @@ module Regexp::Expression
|
|
35
31
|
expressions.last << exp
|
36
32
|
end
|
37
33
|
|
38
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
39
35
|
raise TooManyBranches.new if branches.length == 2
|
40
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
41
37
|
Branch.add_to(self, params, active_opts)
|
42
38
|
end
|
43
39
|
alias :branch :add_sequence
|
@@ -59,14 +55,6 @@ module Regexp::Expression
|
|
59
55
|
condition.reference
|
60
56
|
end
|
61
57
|
|
62
|
-
def referential?
|
63
|
-
true
|
64
|
-
end
|
65
|
-
|
66
|
-
def parts
|
67
|
-
[text.dup, condition, *intersperse(branches, '|'), ')']
|
68
|
-
end
|
69
|
-
|
70
58
|
def initialize_copy(orig)
|
71
59
|
self.referenced_expression = orig.referenced_expression.dup
|
72
60
|
super
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
|
3
2
|
module EscapeSequence
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def codepoint
|
@@ -97,4 +96,7 @@ module Regexp::Expression
|
|
97
96
|
end
|
98
97
|
end
|
99
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
100
102
|
end
|
@@ -5,10 +5,12 @@ module Regexp::Expression
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
9
10
|
|
10
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
11
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
12
14
|
text << exp.text
|
13
15
|
end
|
14
16
|
end
|
@@ -1,13 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def parts
|
5
|
-
[text.dup, *expressions, ')']
|
6
|
-
end
|
7
|
-
|
8
|
-
def capturing?; false end
|
9
|
-
|
10
|
-
def comment?; false end
|
11
4
|
end
|
12
5
|
|
13
6
|
class Passive < Group::Base
|
@@ -18,14 +11,6 @@ module Regexp::Expression
|
|
18
11
|
super
|
19
12
|
end
|
20
13
|
|
21
|
-
def parts
|
22
|
-
if implicit?
|
23
|
-
expressions
|
24
|
-
else
|
25
|
-
super
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
14
|
def implicit?
|
30
15
|
@implicit
|
31
16
|
end
|
@@ -55,8 +40,6 @@ module Regexp::Expression
|
|
55
40
|
class Capture < Group::Base
|
56
41
|
attr_accessor :number, :number_at_level
|
57
42
|
alias identifier number
|
58
|
-
|
59
|
-
def capturing?; true end
|
60
43
|
end
|
61
44
|
|
62
45
|
class Named < Group::Capture
|
@@ -75,11 +58,6 @@ module Regexp::Expression
|
|
75
58
|
end
|
76
59
|
|
77
60
|
class Comment < Group::Base
|
78
|
-
def parts
|
79
|
-
[text.dup]
|
80
|
-
end
|
81
|
-
|
82
|
-
def comment?; true end
|
83
61
|
end
|
84
62
|
end
|
85
63
|
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :property, one way or the other, in v3.0.0
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -11,7 +10,7 @@ module Regexp::Expression
|
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
14
|
-
|
13
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
14
|
end
|
16
15
|
end
|
17
16
|
|
@@ -116,4 +115,8 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
118
|
+
|
119
|
+
# alias for symmetry between token symbol and Expression class name
|
120
|
+
Property = UnicodeProperty
|
121
|
+
Nonproperty = UnicodeProperty
|
119
122
|
end # module Regexp::Expression
|
@@ -25,11 +25,9 @@ module Regexp::Expression
|
|
25
25
|
def token_class
|
26
26
|
if self == Root || self < Sequence
|
27
27
|
nil # no token class because these objects are Parser-generated
|
28
|
-
# TODO: synch exp
|
29
|
-
elsif self ==
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
30
|
Regexp::Syntax::Token::Meta
|
31
|
-
elsif self <= EscapeSequence::Base
|
32
|
-
Regexp::Syntax::Token::Escape
|
33
31
|
else
|
34
32
|
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
33
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -95,12 +95,49 @@ module Regexp::Expression
|
|
95
95
|
end
|
96
96
|
|
97
97
|
# Deep-compare two expressions for equality.
|
98
|
+
#
|
99
|
+
# When changing the conditions, please make sure to update
|
100
|
+
# #pretty_print_instance_variables so that it includes all relevant values.
|
98
101
|
def ==(other)
|
99
|
-
|
100
|
-
other.
|
101
|
-
other.
|
102
|
+
self.class == other.class &&
|
103
|
+
text == other.text &&
|
104
|
+
quantifier == other.quantifier &&
|
105
|
+
options == other.options &&
|
106
|
+
(terminal? || expressions == other.expressions)
|
102
107
|
end
|
103
108
|
alias :=== :==
|
104
109
|
alias :eql? :==
|
110
|
+
|
111
|
+
def optional?
|
112
|
+
quantified? && quantifier.min == 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def quantified?
|
116
|
+
!quantifier.nil?
|
117
|
+
end
|
105
118
|
end
|
119
|
+
|
120
|
+
Shared.class_eval { def terminal?; self.class.terminal? end }
|
121
|
+
Shared::ClassMethods.class_eval { def terminal?; true end }
|
122
|
+
Subexpression.instance_eval { def terminal?; false end }
|
123
|
+
|
124
|
+
Shared.class_eval { def capturing?; self.class.capturing? end }
|
125
|
+
Shared::ClassMethods.class_eval { def capturing?; false end }
|
126
|
+
Group::Capture.instance_eval { def capturing?; true end }
|
127
|
+
|
128
|
+
Shared.class_eval { def comment?; self.class.comment? end }
|
129
|
+
Shared::ClassMethods.class_eval { def comment?; false end }
|
130
|
+
Comment.instance_eval { def comment?; true end }
|
131
|
+
Group::Comment.instance_eval { def comment?; true end }
|
132
|
+
|
133
|
+
Shared.class_eval { def decorative?; self.class.decorative? end }
|
134
|
+
Shared::ClassMethods.class_eval { def decorative?; false end }
|
135
|
+
FreeSpace.instance_eval { def decorative?; true end }
|
136
|
+
Group::Comment.instance_eval { def decorative?; true end }
|
137
|
+
|
138
|
+
Shared.class_eval { def referential?; self.class.referential? end }
|
139
|
+
Shared::ClassMethods.class_eval { def referential?; false end }
|
140
|
+
Backreference::Base.instance_eval { def referential?; true end }
|
141
|
+
Conditional::Condition.instance_eval { def referential?; true end }
|
142
|
+
Conditional::Expression.instance_eval { def referential?; true end }
|
106
143
|
end
|
@@ -1,6 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Subexpression < Regexp::Expression::Base
|
3
3
|
|
4
|
+
# Traverses the expression, passing each recursive child to the
|
5
|
+
# given block.
|
6
|
+
# If the block takes two arguments, the indices of the children within
|
7
|
+
# their parents are also passed to it.
|
8
|
+
def each_expression(include_self = false, &block)
|
9
|
+
return enum_for(__method__, include_self) unless block
|
10
|
+
|
11
|
+
if block.arity == 1
|
12
|
+
block.call(self) if include_self
|
13
|
+
each_expression_without_index(&block)
|
14
|
+
else
|
15
|
+
block.call(self, 0) if include_self
|
16
|
+
each_expression_with_index(&block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
4
20
|
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
21
|
# block for each expression with three arguments; the traversal event,
|
6
22
|
# the expression, and the index of the expression within its parent.
|
@@ -34,34 +50,31 @@ module Regexp::Expression
|
|
34
50
|
end
|
35
51
|
alias :walk :traverse
|
36
52
|
|
37
|
-
#
|
38
|
-
#
|
39
|
-
|
40
|
-
|
53
|
+
# Returns a new array with the results of calling the given block once
|
54
|
+
# for every expression. If a block is not given, returns an array with
|
55
|
+
# each expression and its level index as an array.
|
56
|
+
def flat_map(include_self = false, &block)
|
57
|
+
case block && block.arity
|
58
|
+
when nil then each_expression(include_self).to_a
|
59
|
+
when 2 then each_expression(include_self).map(&block)
|
60
|
+
else each_expression(include_self).map { |exp| block.call(exp) }
|
61
|
+
end
|
62
|
+
end
|
41
63
|
|
42
|
-
|
64
|
+
protected
|
43
65
|
|
66
|
+
def each_expression_with_index(&block)
|
44
67
|
each_with_index do |exp, index|
|
45
68
|
block.call(exp, index)
|
46
|
-
exp.
|
69
|
+
exp.each_expression_with_index(&block) unless exp.terminal?
|
47
70
|
end
|
48
71
|
end
|
49
72
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
result = []
|
55
|
-
|
56
|
-
each_expression(include_self) do |exp, index|
|
57
|
-
if block_given?
|
58
|
-
result << yield(exp, index)
|
59
|
-
else
|
60
|
-
result << [exp, index]
|
61
|
-
end
|
73
|
+
def each_expression_without_index(&block)
|
74
|
+
each do |exp|
|
75
|
+
block.call(exp)
|
76
|
+
exp.each_expression_without_index(&block) unless exp.terminal?
|
62
77
|
end
|
63
|
-
|
64
|
-
result
|
65
78
|
end
|
66
79
|
end
|
67
80
|
end
|