regexp_parser 2.4.0 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +66 -41
- data/README.md +46 -30
- data/lib/regexp_parser/expression/base.rb +17 -9
- data/lib/regexp_parser/expression/classes/backreference.rb +2 -1
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +10 -0
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +3 -5
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +6 -5
- data/lib/regexp_parser/expression/sequence.rb +7 -21
- data/lib/regexp_parser/expression/shared.rb +15 -2
- data/lib/regexp_parser/expression.rb +4 -2
- data/lib/regexp_parser/parser.rb +26 -17
- data/lib/regexp_parser/syntax/token/escape.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cadf1761e17469c6bf76db652a4f6fc97a3d33b7eaa46e6ea16f95ee6661743d
|
|
4
|
+
data.tar.gz: 3d6252f67f201b3cb6a3b94721c65b39abfe7b13bf0097fc9144498f6fdf8837
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3fb24f56b5d8da354aa5825dc2e9432c7e8bd836c9c2a7009c8883e367fb8ca61020a04854c714cacff913281b1156b4663334696edcb1d7e9239d8c8184d439
|
|
7
|
+
data.tar.gz: e793b72a9394e26bf0b9e6cb58c7536b72c30562382713f8b60735969f3b3b9b3aea78bf45efa661397d7141c2684a6df2b32cc8b449c413ea9d11c90c5396db
|
data/CHANGELOG.md
CHANGED
|
@@ -1,33 +1,68 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
1
8
|
## [Unreleased]
|
|
2
9
|
|
|
10
|
+
## [2.6.0] - 2022-09-26 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- fixed `#referenced_expression` for `\g<0>` (was `nil`, is now the `Root` exp)
|
|
15
|
+
- fixed `#reference`, `#referenced_expression` for recursion level backrefs
|
|
16
|
+
* e.g. `(a)(b)\k<-1+1>`
|
|
17
|
+
* `#referenced_expression` was `nil`, now it is the correct `Group` exp
|
|
18
|
+
- detect and raise for two more syntax errors when parsing String input
|
|
19
|
+
* quantification of option switches (e.g. `(?i)+`)
|
|
20
|
+
* invalid references (e.g. `/\k<1>/`)
|
|
21
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
|
22
|
+
|
|
23
|
+
### Added
|
|
24
|
+
|
|
25
|
+
- `Regexp::Expression::Base#human_name`
|
|
26
|
+
* returns a nice, human-readable description of the expression
|
|
27
|
+
- `Regexp::Expression::Base#optional?`
|
|
28
|
+
* returns `true` if the expression is quantified accordingly (e.g. with `*`, `{,n}`)
|
|
29
|
+
- added a deprecation warning when calling `#to_re` on set members
|
|
30
|
+
|
|
31
|
+
## [2.5.0] - 2022-05-27 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
32
|
+
|
|
33
|
+
### Added
|
|
34
|
+
|
|
35
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
|
36
|
+
* see the [wiki](https://github.com/ammar/regexp_parser/wiki) for details
|
|
37
|
+
|
|
3
38
|
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
4
39
|
|
|
5
40
|
### Fixed
|
|
6
41
|
|
|
7
42
|
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
43
|
+
* they used to be treated as reluctant or possessive mode indicators
|
|
44
|
+
* however, Ruby does not support these modes for interval quantifiers
|
|
45
|
+
* they are now treated as chained quantifiers instead, as Ruby does it
|
|
46
|
+
* c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
|
12
47
|
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
|
13
|
-
|
|
48
|
+
* e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
|
14
49
|
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
|
15
|
-
|
|
16
|
-
|
|
50
|
+
* they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
|
51
|
+
* they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
|
17
52
|
|
|
18
53
|
### Added
|
|
19
54
|
|
|
20
55
|
- added `Expression::Base#==` for (deep) comparison of expressions
|
|
21
56
|
- added `Expression::Base#parts`
|
|
22
|
-
|
|
23
|
-
|
|
57
|
+
* returns the text elements and subexpressions of an expression
|
|
58
|
+
* e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
|
24
59
|
- added `Expression::Base#te` (a.k.a. token end index)
|
|
25
|
-
|
|
60
|
+
* `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
|
26
61
|
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
62
|
+
* `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
|
63
|
+
* `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
|
64
|
+
* `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
|
65
|
+
* this allows a more unified handling with `Expression::Base` instances
|
|
31
66
|
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
|
32
67
|
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
|
33
68
|
|
|
@@ -36,10 +71,12 @@
|
|
|
36
71
|
|
|
37
72
|
It will no longer be supported in regexp_parser v3.0.0.
|
|
38
73
|
|
|
39
|
-
Please pass a Regexp::Token instead, e.g. replace `
|
|
40
|
-
with `::Regexp::Token.new(:quantifier,
|
|
74
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
|
75
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
|
41
76
|
will be derived automatically.
|
|
42
77
|
|
|
78
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
|
79
|
+
|
|
43
80
|
This is consistent with how Expression::Base instances are created.
|
|
44
81
|
|
|
45
82
|
|
|
@@ -48,18 +85,18 @@
|
|
|
48
85
|
### Fixed
|
|
49
86
|
|
|
50
87
|
- removed five inexistent unicode properties from `Syntax#features`
|
|
51
|
-
|
|
52
|
-
|
|
88
|
+
* these were never supported by Ruby or the `Regexp::Scanner`
|
|
89
|
+
* thanks to [Markus Schirp](https://github.com/mbj) for the report
|
|
53
90
|
|
|
54
91
|
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
55
92
|
|
|
56
93
|
### Added
|
|
57
94
|
|
|
58
95
|
- improved parsing performance through `Syntax` refactoring
|
|
59
|
-
|
|
60
|
-
|
|
96
|
+
* instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
|
97
|
+
* this approximately doubles the parsing speed for simple regexps
|
|
61
98
|
- added methods to `Syntax` classes to show relative feature sets
|
|
62
|
-
|
|
99
|
+
* e.g. `Regexp::Syntax::V3_2_0.added_features`
|
|
63
100
|
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
|
64
101
|
|
|
65
102
|
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
@@ -67,14 +104,14 @@
|
|
|
67
104
|
### Fixed
|
|
68
105
|
|
|
69
106
|
- fixed Syntax version of absence groups (`(?~...)`)
|
|
70
|
-
|
|
71
|
-
|
|
107
|
+
* the lexer accepted them for any Ruby version
|
|
108
|
+
* now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
|
72
109
|
- reduced gem size by excluding specs from package
|
|
73
110
|
- removed deprecated `test_files` gemspec setting
|
|
74
111
|
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
|
75
112
|
- no longer depend on `set`
|
|
76
|
-
|
|
77
|
-
|
|
113
|
+
* `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
|
114
|
+
* this made it a hidden/undeclared dependency of `regexp_parser`
|
|
78
115
|
|
|
79
116
|
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
80
117
|
|
|
@@ -312,8 +349,8 @@
|
|
|
312
349
|
|
|
313
350
|
- Fixed missing quantifier in `Conditional::Expression` methods `#to_s`, `#to_re`
|
|
314
351
|
- `Conditional::Condition` no longer lives outside the recursive `#expressions` tree
|
|
315
|
-
|
|
316
|
-
|
|
352
|
+
* it used to be the only expression stored in a custom ivar, complicating traversal
|
|
353
|
+
* its setter and getter (`#condition=`, `#condition`) still work as before
|
|
317
354
|
|
|
318
355
|
## [1.1.0] - 2018-09-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
319
356
|
|
|
@@ -321,8 +358,8 @@
|
|
|
321
358
|
|
|
322
359
|
- Added `Quantifier` methods `#greedy?`, `#possessive?`, `#reluctant?`/`#lazy?`
|
|
323
360
|
- Added `Group::Options#option_changes`
|
|
324
|
-
|
|
325
|
-
|
|
361
|
+
* shows the options enabled or disabled by the given options group
|
|
362
|
+
* as with all other expressions, `#options` shows the overall active options
|
|
326
363
|
- Added `Conditional#reference` and `Condition#reference`, indicating the determinative group
|
|
327
364
|
- Added `Subexpression#dig`, acts like [`Array#dig`](http://ruby-doc.org/core-2.5.0/Array.html#method-i-dig)
|
|
328
365
|
|
|
@@ -506,7 +543,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
|
506
543
|
* Fixed scanning of zero length comments (PR #12)
|
|
507
544
|
* Fixed missing escape:codepoint_list syntax token (PR #14)
|
|
508
545
|
* Fixed to_s for modified interval quantifiers (PR #17)
|
|
509
|
-
- Added a note about MRI implementation quirks to Scanner section
|
|
510
546
|
|
|
511
547
|
## [0.3.2] - 2016-01-01 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
|
512
548
|
|
|
@@ -532,7 +568,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
|
532
568
|
- Renamed Lexer's method to lex, added an alias to the old name (scan)
|
|
533
569
|
- Use #map instead of #each to run the block in Lexer.lex.
|
|
534
570
|
- Replaced VERSION.yml file with a constant.
|
|
535
|
-
- Updated README
|
|
536
571
|
- Update tokens and scanner with new additions in Unicode 7.0.
|
|
537
572
|
|
|
538
573
|
## [0.1.6] - 2014-10-06 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
|
@@ -542,20 +577,11 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
|
542
577
|
- Added syntax files for missing ruby 2.x versions. These do not add
|
|
543
578
|
extra syntax support, they just make the gem work with the newer
|
|
544
579
|
ruby versions.
|
|
545
|
-
- Added .travis.yml to project root.
|
|
546
|
-
- README:
|
|
547
|
-
- Removed note purporting runtime support for ruby 1.8.6.
|
|
548
|
-
- Added a section identifying the main unsupported syntax features.
|
|
549
|
-
- Added sections for Testing and Building
|
|
550
|
-
- Added badges for gem version, Travis CI, and code climate.
|
|
551
|
-
- Updated README, fixing broken examples, and converting it from a rdoc file to Github's flavor of Markdown.
|
|
552
580
|
- Fixed a parser bug where an alternation sequence that contained nested expressions was incorrectly being appended to the parent expression when the nesting was exited. e.g. in /a|(b)c/, c was appended to the root.
|
|
553
|
-
|
|
554
581
|
- Fixed a bug where character types were not being correctly scanned within character sets. e.g. in [\d], two tokens were scanned; one for the backslash '\' and one for the 'd'
|
|
555
582
|
|
|
556
583
|
## [0.1.5] - 2014-01-14 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
|
557
584
|
|
|
558
|
-
- Correct ChangeLog.
|
|
559
585
|
- Added syntax stubs for ruby versions 2.0 and 2.1
|
|
560
586
|
- Added clone methods for deep copying expressions.
|
|
561
587
|
- Added optional format argument for to_s on expressions to return the text of the expression with (:full, the default) or without (:base) its quantifier.
|
|
@@ -564,7 +590,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
|
564
590
|
- Improved EOF handling in general and especially from sequences like hex and control escapes.
|
|
565
591
|
- Fixed a bug where named groups with an empty name would return a blank token [].
|
|
566
592
|
- Fixed a bug where member of a parent set where being added to its last subset.
|
|
567
|
-
- Various code cleanups in scanner.rl
|
|
568
593
|
- Fixed a few mutable string bugs by calling dup on the originals.
|
|
569
594
|
- Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer to the latest (1.8.7 at this time)
|
|
570
595
|
- Removed look-behind assertions (positive and negative) from 1.8 syntax
|
data/README.md
CHANGED
|
@@ -9,8 +9,8 @@ A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
|
|
9
9
|
|
|
10
10
|
* Multilayered
|
|
11
11
|
* A scanner/tokenizer based on [Ragel](http://www.colm.net/open-source/ragel/)
|
|
12
|
-
* A lexer that produces a "stream" of
|
|
13
|
-
* A parser that produces a "tree" of Expression objects (OO API)
|
|
12
|
+
* A lexer that produces a "stream" of [Token objects](https://github.com/ammar/regexp_parser/wiki/Token-Objects)
|
|
13
|
+
* A parser that produces a "tree" of [Expression objects (OO API)](https://github.com/ammar/regexp_parser/wiki/Expression-Objects)
|
|
14
14
|
* Runs on Ruby 2.x, 3.x and JRuby runtimes
|
|
15
15
|
* Recognizes Ruby 1.8, 1.9, 2.x and 3.x regular expressions [See Supported Syntax](#supported-syntax)
|
|
16
16
|
|
|
@@ -36,14 +36,15 @@ Or, add it to your project's `Gemfile`:
|
|
|
36
36
|
|
|
37
37
|
```gem 'regexp_parser', '~> X.Y.Z'```
|
|
38
38
|
|
|
39
|
-
See
|
|
39
|
+
See the badge at the top of this README or [rubygems](https://rubygems.org/gems/regexp_parser)
|
|
40
|
+
for the the latest version number.
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
---
|
|
43
44
|
## Usage
|
|
44
45
|
|
|
45
46
|
The three main modules are **Scanner**, **Lexer**, and **Parser**. Each of them
|
|
46
|
-
provides a single method that takes a regular expression (as a
|
|
47
|
+
provides a single method that takes a regular expression (as a Regexp object or
|
|
47
48
|
a string) and returns its results. The **Lexer** and the **Parser** accept an
|
|
48
49
|
optional second argument that specifies the syntax version, like 'ruby/2.0',
|
|
49
50
|
which defaults to the host Ruby version (using RUBY_VERSION).
|
|
@@ -79,7 +80,7 @@ All three methods accept either a `Regexp` or `String` (containing the pattern)
|
|
|
79
80
|
require 'regexp_parser'
|
|
80
81
|
|
|
81
82
|
Regexp::Parser.parse(
|
|
82
|
-
"a+ #
|
|
83
|
+
"a+ # Recognizes a and A...",
|
|
83
84
|
options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE
|
|
84
85
|
)
|
|
85
86
|
```
|
|
@@ -101,7 +102,7 @@ start/end offsets for each token found.
|
|
|
101
102
|
```ruby
|
|
102
103
|
require 'regexp_parser'
|
|
103
104
|
|
|
104
|
-
Regexp::Scanner.scan
|
|
105
|
+
Regexp::Scanner.scan(/(ab?(cd)*[e-h]+)/) do |type, token, text, ts, te|
|
|
105
106
|
puts "type: #{type}, token: #{token}, text: '#{text}' [#{ts}..#{te}]"
|
|
106
107
|
end
|
|
107
108
|
|
|
@@ -124,7 +125,7 @@ A one-liner that uses map on the result of the scan to return the textual
|
|
|
124
125
|
parts of the pattern:
|
|
125
126
|
|
|
126
127
|
```ruby
|
|
127
|
-
Regexp::Scanner.scan(
|
|
128
|
+
Regexp::Scanner.scan(/(cat?([bhm]at)){3,5}/).map { |token| token[2] }
|
|
128
129
|
#=> ["(", "cat", "?", "(", "[", "b", "h", "m", "]", "at", ")", ")", "{3,5}"]
|
|
129
130
|
```
|
|
130
131
|
|
|
@@ -220,7 +221,7 @@ syntax, and prints the token objects' text indented to their level.
|
|
|
220
221
|
```ruby
|
|
221
222
|
require 'regexp_parser'
|
|
222
223
|
|
|
223
|
-
Regexp::Lexer.lex
|
|
224
|
+
Regexp::Lexer.lex(/a?(b(c))*[d]+/, 'ruby/1.9') do |token|
|
|
224
225
|
puts "#{' ' * token.level}#{token.text}"
|
|
225
226
|
end
|
|
226
227
|
|
|
@@ -246,7 +247,7 @@ how the sequence 'cat' is treated. The 't' is separated because it's followed
|
|
|
246
247
|
by a quantifier that only applies to it.
|
|
247
248
|
|
|
248
249
|
```ruby
|
|
249
|
-
Regexp::Lexer.scan(
|
|
250
|
+
Regexp::Lexer.scan(/(cat?([b]at)){3,5}/).map { |token| token.text }
|
|
250
251
|
#=> ["(", "ca", "t", "?", "(", "[", "b", "]", "at", ")", ")", "{3,5}"]
|
|
251
252
|
```
|
|
252
253
|
|
|
@@ -274,7 +275,7 @@ require 'regexp_parser'
|
|
|
274
275
|
|
|
275
276
|
regex = /a?(b+(c)d)*(?<name>[0-9]+)/
|
|
276
277
|
|
|
277
|
-
tree = Regexp::Parser.parse(
|
|
278
|
+
tree = Regexp::Parser.parse(regex, 'ruby/2.1')
|
|
278
279
|
|
|
279
280
|
tree.traverse do |event, exp|
|
|
280
281
|
puts "#{event}: #{exp.type} `#{exp.to_s}`"
|
|
@@ -355,7 +356,7 @@ _Note that not all of these are available in all versions of Ruby_
|
|
|
355
356
|
|   _Nest Level_ | `\k<n-1>` | ✓ |
|
|
356
357
|
|   _Numbered_ | `\k<1>` | ✓ |
|
|
357
358
|
|   _Relative_ | `\k<-2>` | ✓ |
|
|
358
|
-
|   _Traditional_ | `\1`
|
|
359
|
+
|   _Traditional_ | `\1` through `\9` | ✓ |
|
|
359
360
|
|   _**Capturing**_ | `(abc)` | ✓ |
|
|
360
361
|
|   _**Comments**_ | `(?# comment text)` | ✓ |
|
|
361
362
|
|   _**Named**_ | `(?<name>abc)`, `(?'name'abc)` | ✓ |
|
|
@@ -375,7 +376,7 @@ _Note that not all of these are available in all versions of Ruby_
|
|
|
375
376
|
|   _**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
|
376
377
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
|
377
378
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
|
378
|
-
| **Unicode Properties** | _<sub>([Unicode 13.0.0]
|
|
379
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0])</sub>_ | ⋱ |
|
|
379
380
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
|
380
381
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
|
381
382
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
|
@@ -384,13 +385,17 @@ _Note that not all of these are available in all versions of Ruby_
|
|
|
384
385
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
|
385
386
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
|
386
387
|
|
|
387
|
-
|
|
388
|
-
|
|
388
|
+
[Unicode 13.0.0]: https://www.unicode.org/versions/Unicode13.0.0/
|
|
389
|
+
|
|
390
|
+
**\[1\]**: Ruby does not support lazy or possessive interval quantifiers.
|
|
391
|
+
Any `+` or `?` that follows an interval quantifier will be treated as another,
|
|
392
|
+
chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
|
|
389
393
|
[#69](https://github.com/ammar/regexp_parser/pull/69).
|
|
390
394
|
|
|
391
|
-
**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex
|
|
392
|
-
https://github.com/ruby/ruby/commit/
|
|
393
|
-
scanner and will only be emitted if a String or a Regexp
|
|
395
|
+
**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex
|
|
396
|
+
escapes when used in Regexp literals](https://github.com/ruby/ruby/commit/11ae581),
|
|
397
|
+
so they will only reach the scanner and will only be emitted if a String or a Regexp
|
|
398
|
+
that has been built with the `::new` constructor is scanned.
|
|
394
399
|
|
|
395
400
|
##### Inapplicable Features
|
|
396
401
|
|
|
@@ -407,25 +412,27 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
|
407
412
|
|
|
408
413
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
|
409
414
|
|
|
410
|
-
_**Note**: Attempting to process expressions with unsupported syntax features can raise
|
|
411
|
-
or incorrectly return tokens/objects as literals._
|
|
415
|
+
_**Note**: Attempting to process expressions with unsupported syntax features can raise
|
|
416
|
+
an error, or incorrectly return tokens/objects as literals._
|
|
412
417
|
|
|
413
418
|
|
|
414
419
|
## Testing
|
|
415
420
|
To run the tests simply run rake from the root directory.
|
|
416
421
|
|
|
417
|
-
The default task generates the scanner's code from the Ragel source files and runs
|
|
422
|
+
The default task generates the scanner's code from the Ragel source files and runs
|
|
423
|
+
all the specs, thus it requires Ragel to be installed.
|
|
418
424
|
|
|
419
|
-
Note that changes to Ragel files will not be reflected when running `rspec` on its own,
|
|
425
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own,
|
|
426
|
+
so to run individual tests you might want to run:
|
|
420
427
|
|
|
421
428
|
```
|
|
422
429
|
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
|
423
430
|
```
|
|
424
431
|
|
|
425
432
|
## Building
|
|
426
|
-
Building the scanner and the gem requires [Ragel](http://www.colm.net/open-source/ragel/)
|
|
427
|
-
installed. The build tasks will automatically invoke the 'ragel:rb' task to generate
|
|
428
|
-
Ruby scanner code.
|
|
433
|
+
Building the scanner and the gem requires [Ragel](http://www.colm.net/open-source/ragel/)
|
|
434
|
+
to be installed. The build tasks will automatically invoke the 'ragel:rb' task to generate
|
|
435
|
+
the Ruby scanner code.
|
|
429
436
|
|
|
430
437
|
|
|
431
438
|
The project uses the standard rubygems package tasks, so:
|
|
@@ -445,17 +452,26 @@ rake install
|
|
|
445
452
|
## Example Projects
|
|
446
453
|
Projects using regexp_parser.
|
|
447
454
|
|
|
448
|
-
- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool
|
|
455
|
+
- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool
|
|
456
|
+
that uses regexp_parser to convert Regexps to css/xpath selectors.
|
|
457
|
+
|
|
458
|
+
- [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions
|
|
459
|
+
to JavaScript-compatible regular expressions.
|
|
449
460
|
|
|
450
|
-
- [
|
|
461
|
+
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor
|
|
462
|
+
with alias support.
|
|
451
463
|
|
|
452
|
-
- [
|
|
464
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions
|
|
465
|
+
(amongst others) to see if your tests cover their behavior.
|
|
453
466
|
|
|
454
|
-
- [
|
|
467
|
+
- [repper](https://github.com/jaynetics/repper) is a regular expression
|
|
468
|
+
pretty-printer and formatter for Ruby.
|
|
455
469
|
|
|
456
|
-
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that
|
|
470
|
+
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that
|
|
471
|
+
uses regexp_parser to lint Regexps.
|
|
457
472
|
|
|
458
|
-
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper
|
|
473
|
+
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper
|
|
474
|
+
that uses regexp_parser to generate examples of postal codes.
|
|
459
475
|
|
|
460
476
|
|
|
461
477
|
## References
|
|
@@ -14,6 +14,10 @@ module Regexp::Expression
|
|
|
14
14
|
end
|
|
15
15
|
|
|
16
16
|
def to_re(format = :full)
|
|
17
|
+
if set_level > 0
|
|
18
|
+
warn "Calling #to_re on character set members is deprecated - "\
|
|
19
|
+
"their behavior might not be equivalent outside of the set."
|
|
20
|
+
end
|
|
17
21
|
::Regexp.new(to_s(format))
|
|
18
22
|
end
|
|
19
23
|
|
|
@@ -32,15 +36,19 @@ module Regexp::Expression
|
|
|
32
36
|
end
|
|
33
37
|
|
|
34
38
|
def repetitions
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
39
|
+
@repetitions ||=
|
|
40
|
+
if quantified?
|
|
41
|
+
min = quantifier.min
|
|
42
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
|
43
|
+
range = min..max
|
|
44
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
|
45
|
+
if RUBY_VERSION.to_f < 2.7
|
|
46
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
|
47
|
+
end
|
|
48
|
+
range
|
|
49
|
+
else
|
|
50
|
+
1..1
|
|
51
|
+
end
|
|
44
52
|
end
|
|
45
53
|
|
|
46
54
|
def greedy?
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
module Regexp::Expression
|
|
2
|
+
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
|
2
3
|
module Backreference
|
|
3
4
|
class Base < Regexp::Expression::Base
|
|
4
5
|
attr_accessor :referenced_expression
|
|
@@ -38,7 +39,7 @@ module Regexp::Expression
|
|
|
38
39
|
class NameCall < Backreference::Name; end
|
|
39
40
|
class NumberCallRelative < Backreference::NumberRelative; end
|
|
40
41
|
|
|
41
|
-
class NumberRecursionLevel < Backreference::
|
|
42
|
+
class NumberRecursionLevel < Backreference::NumberRelative
|
|
42
43
|
attr_reader :recursion_level
|
|
43
44
|
|
|
44
45
|
def initialize(token, options = {})
|
|
File without changes
|
|
@@ -33,6 +33,8 @@ module Regexp::Expression
|
|
|
33
33
|
|
|
34
34
|
class Absence < Group::Base; end
|
|
35
35
|
class Atomic < Group::Base; end
|
|
36
|
+
# TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
|
|
37
|
+
# longer inherit from Group because it is effectively a terminal expression.
|
|
36
38
|
class Options < Group::Base
|
|
37
39
|
attr_accessor :option_changes
|
|
38
40
|
|
|
@@ -40,6 +42,14 @@ module Regexp::Expression
|
|
|
40
42
|
self.option_changes = orig.option_changes.dup
|
|
41
43
|
super
|
|
42
44
|
end
|
|
45
|
+
|
|
46
|
+
def quantify(*args)
|
|
47
|
+
if token == :options_switch
|
|
48
|
+
raise Regexp::Parser::Error, 'Can not quantify an option switch'
|
|
49
|
+
else
|
|
50
|
+
super
|
|
51
|
+
end
|
|
52
|
+
end
|
|
43
53
|
end
|
|
44
54
|
|
|
45
55
|
class Capture < Group::Base
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
module Regexp::Expression
|
|
2
2
|
class Root < Regexp::Expression::Subexpression
|
|
3
3
|
def self.build(options = {})
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def self.build_token
|
|
8
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
|
6
|
+
construct(options: options)
|
|
9
7
|
end
|
|
10
8
|
end
|
|
11
9
|
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
module Regexp::Expression
|
|
2
|
+
module Shared
|
|
3
|
+
module ClassMethods
|
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
|
5
|
+
def construct(params = {})
|
|
6
|
+
attrs = construct_defaults.merge(params)
|
|
7
|
+
options = attrs.delete(:options)
|
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
|
9
|
+
token = Regexp::Token.new(*token_args)
|
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
|
11
|
+
|
|
12
|
+
new(token, options)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def construct_defaults
|
|
16
|
+
if self == Root
|
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
|
18
|
+
elsif self < Sequence
|
|
19
|
+
{ type: :expression, token: :sequence }
|
|
20
|
+
else
|
|
21
|
+
{ type: token_class::Type }
|
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def token_class
|
|
26
|
+
if self == Root || self < Sequence
|
|
27
|
+
nil # no token class because these objects are Parser-generated
|
|
28
|
+
# TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
|
|
29
|
+
elsif self == Alternation || self == CharacterType::Any
|
|
30
|
+
Regexp::Syntax::Token::Meta
|
|
31
|
+
elsif self <= EscapeSequence::Base
|
|
32
|
+
Regexp::Syntax::Token::Escape
|
|
33
|
+
else
|
|
34
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def token_class
|
|
40
|
+
self.class.token_class
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
module Regexp::Expression
|
|
2
|
+
module Shared
|
|
3
|
+
# default implementation, e.g. "atomic group", "hex escape", "word type", ..
|
|
4
|
+
def human_name
|
|
5
|
+
[token, type].compact.join(' ').tr('_', ' ')
|
|
6
|
+
end
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
Alternation.class_eval { def human_name; 'alternation' end }
|
|
10
|
+
Alternative.class_eval { def human_name; 'alternative' end }
|
|
11
|
+
Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
|
|
12
|
+
Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
|
|
13
|
+
Anchor::EOL.class_eval { def human_name; 'end of line' end }
|
|
14
|
+
Anchor::EOS.class_eval { def human_name; 'end of string' end }
|
|
15
|
+
Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
|
|
16
|
+
Anchor::MatchStart.class_eval { def human_name; 'match start' end }
|
|
17
|
+
Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
|
|
18
|
+
Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
|
|
19
|
+
Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
|
|
20
|
+
Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
|
|
21
|
+
Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
|
|
22
|
+
Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
|
|
23
|
+
Backreference::Name.class_eval { def human_name; 'backreference by name' end }
|
|
24
|
+
Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
|
|
25
|
+
Backreference::Number.class_eval { def human_name; 'backreference' end }
|
|
26
|
+
Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
|
|
27
|
+
Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
|
|
28
|
+
Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
|
|
29
|
+
CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
|
|
30
|
+
CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
|
|
31
|
+
CharacterSet::Range.class_eval { def human_name; 'character range' end }
|
|
32
|
+
CharacterType::Any.class_eval { def human_name; 'match-all' end }
|
|
33
|
+
Comment.class_eval { def human_name; 'comment' end }
|
|
34
|
+
Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
|
|
35
|
+
Conditional::Condition.class_eval { def human_name; 'condition' end }
|
|
36
|
+
Conditional::Expression.class_eval { def human_name; 'conditional' end }
|
|
37
|
+
Group::Capture.class_eval { def human_name; "capture group #{number}" end }
|
|
38
|
+
Group::Named.class_eval { def human_name; 'named capture group' end }
|
|
39
|
+
Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
|
|
40
|
+
Literal.class_eval { def human_name; 'literal' end }
|
|
41
|
+
Root.class_eval { def human_name; 'root' end }
|
|
42
|
+
WhiteSpace.class_eval { def human_name; 'free space' end }
|
|
43
|
+
end
|
|
@@ -112,7 +112,7 @@ module Regexp::Expression
|
|
|
112
112
|
end
|
|
113
113
|
|
|
114
114
|
def inner_match_length
|
|
115
|
-
dummy = Regexp::Expression::Root.
|
|
115
|
+
dummy = Regexp::Expression::Root.construct
|
|
116
116
|
dummy.expressions = expressions.map(&:clone)
|
|
117
117
|
dummy.quantifier = quantifier && quantifier.clone
|
|
118
118
|
dummy.match_length
|
|
@@ -14,7 +14,7 @@ module Regexp::Expression
|
|
|
14
14
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
|
15
15
|
|
|
16
16
|
init_from_token_and_options(*args)
|
|
17
|
-
@mode = (token[/greedy|reluctant|possessive/] || :greedy).to_sym
|
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
|
18
18
|
@min, @max = minmax
|
|
19
19
|
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
|
20
20
|
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
|
@@ -44,10 +44,11 @@ module Regexp::Expression
|
|
|
44
44
|
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
|
45
45
|
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
|
46
46
|
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
|
47
|
-
"Please pass a Regexp::Token instead, e.g. replace `
|
|
48
|
-
"with `::Regexp::Token.new(:quantifier,
|
|
49
|
-
"will be derived automatically
|
|
50
|
-
"
|
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
|
49
|
+
"will be derived automatically.\n"\
|
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
|
51
52
|
@token = token
|
|
52
53
|
@text = text
|
|
53
54
|
@min = min
|
|
@@ -7,31 +7,17 @@ module Regexp::Expression
|
|
|
7
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
|
8
8
|
class Sequence < Regexp::Expression::Subexpression
|
|
9
9
|
class << self
|
|
10
|
-
def add_to(
|
|
11
|
-
sequence =
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
params[:conditional_level] ||
|
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
|
11
|
+
sequence = construct(
|
|
12
|
+
level: exp.level,
|
|
13
|
+
set_level: exp.set_level,
|
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
|
15
15
|
)
|
|
16
|
-
sequence.nesting_level =
|
|
16
|
+
sequence.nesting_level = exp.nesting_level + 1
|
|
17
17
|
sequence.options = active_opts
|
|
18
|
-
|
|
18
|
+
exp.expressions << sequence
|
|
19
19
|
sequence
|
|
20
20
|
end
|
|
21
|
-
|
|
22
|
-
def at_levels(level, set_level, conditional_level)
|
|
23
|
-
token = Regexp::Token.new(
|
|
24
|
-
:expression,
|
|
25
|
-
:sequence,
|
|
26
|
-
'',
|
|
27
|
-
nil, # ts
|
|
28
|
-
nil, # te
|
|
29
|
-
level,
|
|
30
|
-
set_level,
|
|
31
|
-
conditional_level
|
|
32
|
-
)
|
|
33
|
-
new(token)
|
|
34
|
-
end
|
|
35
21
|
end
|
|
36
22
|
|
|
37
23
|
def starts_at
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
module Regexp::Expression
|
|
2
2
|
module Shared
|
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
|
4
|
+
|
|
3
5
|
def self.included(mod)
|
|
4
6
|
mod.class_eval do
|
|
7
|
+
extend Shared::ClassMethods
|
|
8
|
+
|
|
5
9
|
attr_accessor :type, :token, :text, :ts, :te,
|
|
6
10
|
:level, :set_level, :conditional_level,
|
|
7
|
-
:options
|
|
11
|
+
:options
|
|
8
12
|
|
|
9
|
-
attr_reader :nesting_level
|
|
13
|
+
attr_reader :nesting_level, :quantifier
|
|
10
14
|
end
|
|
11
15
|
end
|
|
12
16
|
|
|
@@ -60,6 +64,10 @@ module Regexp::Expression
|
|
|
60
64
|
!quantifier.nil?
|
|
61
65
|
end
|
|
62
66
|
|
|
67
|
+
def optional?
|
|
68
|
+
quantified? && quantifier.min == 0
|
|
69
|
+
end
|
|
70
|
+
|
|
63
71
|
def offset
|
|
64
72
|
[starts_at, full_length]
|
|
65
73
|
end
|
|
@@ -77,5 +85,10 @@ module Regexp::Expression
|
|
|
77
85
|
quantifier && quantifier.nesting_level = lvl
|
|
78
86
|
terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
|
|
79
87
|
end
|
|
88
|
+
|
|
89
|
+
def quantifier=(qtf)
|
|
90
|
+
@quantifier = qtf
|
|
91
|
+
@repetitions = nil # clear memoized value
|
|
92
|
+
end
|
|
80
93
|
end
|
|
81
94
|
end
|
|
@@ -13,6 +13,7 @@ require 'regexp_parser/expression/classes/backreference'
|
|
|
13
13
|
require 'regexp_parser/expression/classes/character_set'
|
|
14
14
|
require 'regexp_parser/expression/classes/character_set/intersection'
|
|
15
15
|
require 'regexp_parser/expression/classes/character_set/range'
|
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
|
16
17
|
require 'regexp_parser/expression/classes/conditional'
|
|
17
18
|
require 'regexp_parser/expression/classes/escape_sequence'
|
|
18
19
|
require 'regexp_parser/expression/classes/free_space'
|
|
@@ -20,10 +21,11 @@ require 'regexp_parser/expression/classes/group'
|
|
|
20
21
|
require 'regexp_parser/expression/classes/keep'
|
|
21
22
|
require 'regexp_parser/expression/classes/literal'
|
|
22
23
|
require 'regexp_parser/expression/classes/posix_class'
|
|
23
|
-
require 'regexp_parser/expression/classes/property'
|
|
24
24
|
require 'regexp_parser/expression/classes/root'
|
|
25
|
-
require 'regexp_parser/expression/classes/
|
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
|
26
26
|
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
|
28
|
+
require 'regexp_parser/expression/methods/human_name'
|
|
27
29
|
require 'regexp_parser/expression/methods/match'
|
|
28
30
|
require 'regexp_parser/expression/methods/match_length'
|
|
29
31
|
require 'regexp_parser/expression/methods/options'
|
data/lib/regexp_parser/parser.rb
CHANGED
|
@@ -23,7 +23,7 @@ class Regexp::Parser
|
|
|
23
23
|
end
|
|
24
24
|
|
|
25
25
|
def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
|
26
|
-
root = Root.
|
|
26
|
+
root = Root.construct(options: extract_options(input, options))
|
|
27
27
|
|
|
28
28
|
self.root = root
|
|
29
29
|
self.node = root
|
|
@@ -200,11 +200,11 @@ class Regexp::Parser
|
|
|
200
200
|
end
|
|
201
201
|
|
|
202
202
|
def captured_group_count_at_level
|
|
203
|
-
captured_group_counts[node
|
|
203
|
+
captured_group_counts[node]
|
|
204
204
|
end
|
|
205
205
|
|
|
206
206
|
def count_captured_group
|
|
207
|
-
captured_group_counts[node
|
|
207
|
+
captured_group_counts[node] += 1
|
|
208
208
|
end
|
|
209
209
|
|
|
210
210
|
def close_group
|
|
@@ -235,7 +235,15 @@ class Regexp::Parser
|
|
|
235
235
|
when :number, :number_ref
|
|
236
236
|
node << Backreference::Number.new(token, active_opts)
|
|
237
237
|
when :number_recursion_ref
|
|
238
|
-
node << Backreference::NumberRecursionLevel.new(token, active_opts)
|
|
238
|
+
node << Backreference::NumberRecursionLevel.new(token, active_opts).tap do |exp|
|
|
239
|
+
# TODO: should split off new token number_recursion_rel_ref and new
|
|
240
|
+
# class NumberRelativeRecursionLevel in v3.0.0 to get rid of this
|
|
241
|
+
if exp.text =~ /[<'][+-]/
|
|
242
|
+
assign_effective_number(exp)
|
|
243
|
+
else
|
|
244
|
+
exp.effective_number = exp.number
|
|
245
|
+
end
|
|
246
|
+
end
|
|
239
247
|
when :number_call
|
|
240
248
|
node << Backreference::NumberCall.new(token, active_opts)
|
|
241
249
|
when :number_rel_ref
|
|
@@ -254,6 +262,8 @@ class Regexp::Parser
|
|
|
254
262
|
def assign_effective_number(exp)
|
|
255
263
|
exp.effective_number =
|
|
256
264
|
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
|
265
|
+
exp.effective_number > 0 ||
|
|
266
|
+
raise(ParserError, "Invalid reference: #{exp.reference}")
|
|
257
267
|
end
|
|
258
268
|
|
|
259
269
|
def conditional(token)
|
|
@@ -475,17 +485,14 @@ class Regexp::Parser
|
|
|
475
485
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
|
476
486
|
# rationale for this solution: https://github.com/ammar/regexp_parser/pull/69
|
|
477
487
|
if target_node.quantified?
|
|
478
|
-
|
|
479
|
-
:
|
|
480
|
-
:
|
|
481
|
-
|
|
482
|
-
target_node.
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
target_node.set_level,
|
|
486
|
-
target_node.conditional_level
|
|
488
|
+
new_group = Group::Passive.construct(
|
|
489
|
+
token: :passive,
|
|
490
|
+
ts: target_node.ts,
|
|
491
|
+
level: target_node.level,
|
|
492
|
+
set_level: target_node.set_level,
|
|
493
|
+
conditional_level: target_node.conditional_level,
|
|
494
|
+
options: active_opts,
|
|
487
495
|
)
|
|
488
|
-
new_group = Group::Passive.new(new_token, active_opts)
|
|
489
496
|
new_group.implicit = true
|
|
490
497
|
new_group << target_node
|
|
491
498
|
increase_group_level(target_node)
|
|
@@ -572,15 +579,17 @@ class Regexp::Parser
|
|
|
572
579
|
# an instance of Backreference::Number, its #referenced_expression is set to
|
|
573
580
|
# the instance of Group::Capture that it refers to via its number.
|
|
574
581
|
def assign_referenced_expressions
|
|
575
|
-
targets = {}
|
|
576
582
|
# find all referencable expressions
|
|
583
|
+
targets = { 0 => root }
|
|
577
584
|
root.each_expression do |exp|
|
|
578
585
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
|
579
586
|
end
|
|
580
587
|
# assign them to any refering expressions
|
|
581
588
|
root.each_expression do |exp|
|
|
582
|
-
exp.respond_to?(:reference)
|
|
583
|
-
|
|
589
|
+
next unless exp.respond_to?(:reference)
|
|
590
|
+
|
|
591
|
+
exp.referenced_expression = targets[exp.reference] ||
|
|
592
|
+
raise(ParserError, "Invalid reference: #{exp.reference}")
|
|
584
593
|
end
|
|
585
594
|
end
|
|
586
595
|
end # module Regexp::Parser
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: regexp_parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ammar Ali
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2022-
|
|
11
|
+
date: 2022-09-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
|
|
14
14
|
email:
|
|
@@ -32,6 +32,7 @@ files:
|
|
|
32
32
|
- lib/regexp_parser/expression/classes/character_set.rb
|
|
33
33
|
- lib/regexp_parser/expression/classes/character_set/intersection.rb
|
|
34
34
|
- lib/regexp_parser/expression/classes/character_set/range.rb
|
|
35
|
+
- lib/regexp_parser/expression/classes/character_type.rb
|
|
35
36
|
- lib/regexp_parser/expression/classes/conditional.rb
|
|
36
37
|
- lib/regexp_parser/expression/classes/escape_sequence.rb
|
|
37
38
|
- lib/regexp_parser/expression/classes/free_space.rb
|
|
@@ -39,9 +40,10 @@ files:
|
|
|
39
40
|
- lib/regexp_parser/expression/classes/keep.rb
|
|
40
41
|
- lib/regexp_parser/expression/classes/literal.rb
|
|
41
42
|
- lib/regexp_parser/expression/classes/posix_class.rb
|
|
42
|
-
- lib/regexp_parser/expression/classes/property.rb
|
|
43
43
|
- lib/regexp_parser/expression/classes/root.rb
|
|
44
|
-
- lib/regexp_parser/expression/classes/
|
|
44
|
+
- lib/regexp_parser/expression/classes/unicode_property.rb
|
|
45
|
+
- lib/regexp_parser/expression/methods/construct.rb
|
|
46
|
+
- lib/regexp_parser/expression/methods/human_name.rb
|
|
45
47
|
- lib/regexp_parser/expression/methods/match.rb
|
|
46
48
|
- lib/regexp_parser/expression/methods/match_length.rb
|
|
47
49
|
- lib/regexp_parser/expression/methods/options.rb
|