regexp_parser 2.3.0 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +63 -6
- data/Gemfile +1 -0
- data/README.md +12 -6
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +9 -57
- data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +3 -5
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +41 -23
- data/lib/regexp_parser/expression/sequence.rb +9 -23
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +85 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -7
- data/lib/regexp_parser/expression.rb +4 -2
- data/lib/regexp_parser/parser.rb +21 -72
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +42 -31
- data/lib/regexp_parser/scanner.rb +725 -793
- data/lib/regexp_parser/syntax/token/escape.rb +1 -1
- data/lib/regexp_parser/syntax/token/unicode_property.rb +0 -5
- data/lib/regexp_parser/version.rb +1 -1
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
|
4
|
+
data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
|
7
|
+
data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,62 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### Added
|
4
|
+
|
5
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
6
|
+
|
7
|
+
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
8
|
+
|
9
|
+
### Fixed
|
10
|
+
|
11
|
+
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
12
|
+
- they used to be treated as reluctant or possessive mode indicators
|
13
|
+
- however, Ruby does not support these modes for interval quantifiers
|
14
|
+
- they are now treated as chained quantifiers instead, as Ruby does it
|
15
|
+
- c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
16
|
+
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
17
|
+
- e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
18
|
+
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
19
|
+
- they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
20
|
+
- they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
21
|
+
|
22
|
+
### Added
|
23
|
+
|
24
|
+
- added `Expression::Base#==` for (deep) comparison of expressions
|
25
|
+
- added `Expression::Base#parts`
|
26
|
+
- returns the text elements and subexpressions of an expression
|
27
|
+
- e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
28
|
+
- added `Expression::Base#te` (a.k.a. token end index)
|
29
|
+
- `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
30
|
+
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
31
|
+
- `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
32
|
+
- `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
33
|
+
- `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
34
|
+
- this allows a more unified handling with `Expression::Base` instances
|
35
|
+
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
36
|
+
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
37
|
+
|
38
|
+
Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
|
39
|
+
is deprecated.
|
40
|
+
|
41
|
+
It will no longer be supported in regexp_parser v3.0.0.
|
42
|
+
|
43
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
44
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
45
|
+
will be derived automatically.
|
46
|
+
|
47
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
48
|
+
|
49
|
+
This is consistent with how Expression::Base instances are created.
|
50
|
+
|
51
|
+
|
52
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
53
|
+
|
54
|
+
### Fixed
|
55
|
+
|
56
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
57
|
+
- these were never supported by Ruby or the `Regexp::Scanner`
|
58
|
+
- thanks to [Markus Schirp](https://github.com/mbj) for the report
|
59
|
+
|
3
60
|
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
61
|
|
5
62
|
### Added
|
@@ -180,7 +237,7 @@
|
|
180
237
|
|
181
238
|
### Added
|
182
239
|
|
183
|
-
- `Expression#each_expression` and `#traverse` can now be called without a block
|
240
|
+
- `Expression::Base#each_expression` and `#traverse` can now be called without a block
|
184
241
|
* this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
|
185
242
|
* thanks to [Masataka Kuwabara](https://github.com/pocke)
|
186
243
|
|
@@ -206,7 +263,7 @@
|
|
206
263
|
- Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
|
207
264
|
- Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
|
208
265
|
- Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
|
209
|
-
- Fixed `Expression#match` and `#=~` not working with a single argument
|
266
|
+
- Fixed `Expression::Base#match` and `#=~` not working with a single argument
|
210
267
|
|
211
268
|
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
212
269
|
|
@@ -214,15 +271,15 @@
|
|
214
271
|
|
215
272
|
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
216
273
|
* returns the `Group` expression that is being referenced via name or number
|
217
|
-
- Added `Expression#repetitions`
|
274
|
+
- Added `Expression::Base#repetitions`
|
218
275
|
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
219
276
|
* like `#quantity` but with a more uniform interface
|
220
|
-
- Added `Expression#match_length`
|
277
|
+
- Added `Expression::Base#match_length`
|
221
278
|
* allows to inspect and iterate over String lengths matched by the Expression
|
222
279
|
|
223
280
|
### Fixed
|
224
281
|
|
225
|
-
- Fixed `Expression#clone` "direction"
|
282
|
+
- Fixed `Expression::Base#clone` "direction"
|
226
283
|
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
227
284
|
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
228
285
|
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
@@ -384,7 +441,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
384
441
|
- Fixed a thread safety issue (issue #45)
|
385
442
|
- Some public class methods that were only reliable for
|
386
443
|
internal use are now private instance methods (PR #46)
|
387
|
-
- Improved the usefulness of Expression#options (issue #43) -
|
444
|
+
- Improved the usefulness of Expression::Base#options (issue #43) -
|
388
445
|
#options and derived methods such as #i?, #m? and #x? are now
|
389
446
|
defined for all Expressions that are affected by such flags.
|
390
447
|
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -367,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
367
367
|
| **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | ✓ |
|
368
368
|
| **Quantifiers** | | ⋱ |
|
369
369
|
|   _**Greedy**_ | `?`, `*`, `+`, `{m,M}` | ✓ |
|
370
|
-
|   _**Reluctant** (Lazy)_ | `??`, `*?`,
|
371
|
-
|   _**Possessive**_ | `?+`, `*+`,
|
370
|
+
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | ✓ |
|
371
|
+
|   _**Possessive**_ | `?+`, `*+`, `++` \[1\] | ✓ |
|
372
372
|
| **String Escapes** | | ⋱ |
|
373
|
-
|   _**Control** \[
|
373
|
+
|   _**Control** \[2\]_ | `\C-C`, `\cD` | ✓ |
|
374
374
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
375
|
-
|   _**Meta** \[
|
375
|
+
|   _**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
376
376
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
377
377
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
378
378
|
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
@@ -384,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
|
|
384
384
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
385
385
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
386
386
|
|
387
|
-
**\[1\]**:
|
387
|
+
**\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
|
388
|
+
quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
|
389
|
+
[#69](https://github.com/ammar/regexp_parser/pull/69).
|
390
|
+
|
391
|
+
**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
388
392
|
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
389
393
|
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
390
394
|
|
@@ -443,12 +447,14 @@ Projects using regexp_parser.
|
|
443
447
|
|
444
448
|
- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
|
445
449
|
|
446
|
-
- [js_regex](https://github.com/
|
450
|
+
- [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
447
451
|
|
448
452
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
449
453
|
|
450
454
|
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
451
455
|
|
456
|
+
- [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
|
457
|
+
|
452
458
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
453
459
|
|
454
460
|
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
|
data/lib/regexp_parser/error.rb
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Base
|
3
|
-
|
4
|
-
attr_accessor :text, :ts
|
5
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
-
|
7
|
-
attr_accessor :quantifier
|
8
|
-
attr_accessor :options
|
3
|
+
include Regexp::Expression::Shared
|
9
4
|
|
10
5
|
def initialize(token, options = {})
|
11
|
-
|
12
|
-
self.token = token.token
|
13
|
-
self.text = token.text
|
14
|
-
self.ts = token.ts
|
15
|
-
self.level = token.level
|
16
|
-
self.set_level = token.set_level
|
17
|
-
self.conditional_level = token.conditional_level
|
18
|
-
self.nesting_level = 0
|
19
|
-
self.quantifier = nil
|
20
|
-
self.options = options
|
6
|
+
init_from_token_and_options(token, options)
|
21
7
|
end
|
22
8
|
|
23
9
|
def initialize_copy(orig)
|
24
|
-
self.text =
|
25
|
-
self.options =
|
26
|
-
self.quantifier =
|
10
|
+
self.text = orig.text.dup if orig.text
|
11
|
+
self.options = orig.options.dup if orig.options
|
12
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
27
13
|
super
|
28
14
|
end
|
29
15
|
|
@@ -31,48 +17,14 @@ module Regexp::Expression
|
|
31
17
|
::Regexp.new(to_s(format))
|
32
18
|
end
|
33
19
|
|
34
|
-
|
35
|
-
|
36
|
-
def base_length
|
37
|
-
to_s(:base).length
|
38
|
-
end
|
39
|
-
|
40
|
-
def full_length
|
41
|
-
to_s.length
|
42
|
-
end
|
43
|
-
|
44
|
-
def offset
|
45
|
-
[starts_at, full_length]
|
46
|
-
end
|
47
|
-
|
48
|
-
def coded_offset
|
49
|
-
'@%d+%d' % offset
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_s(format = :full)
|
53
|
-
"#{text}#{quantifier_affix(format)}"
|
54
|
-
end
|
55
|
-
|
56
|
-
def quantifier_affix(expression_format)
|
57
|
-
quantifier.to_s if quantified? && expression_format != :base
|
58
|
-
end
|
59
|
-
|
60
|
-
def terminal?
|
61
|
-
!respond_to?(:expressions)
|
62
|
-
end
|
63
|
-
|
64
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
20
|
+
def quantify(*args)
|
21
|
+
self.quantifier = Quantifier.new(*args)
|
66
22
|
end
|
67
23
|
|
68
24
|
def unquantified_clone
|
69
25
|
clone.tap { |exp| exp.quantifier = nil }
|
70
26
|
end
|
71
27
|
|
72
|
-
def quantified?
|
73
|
-
!quantifier.nil?
|
74
|
-
end
|
75
|
-
|
76
28
|
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
29
|
def quantity
|
78
30
|
return [nil,nil] unless quantified?
|
@@ -104,7 +56,7 @@ module Regexp::Expression
|
|
104
56
|
quantified? and quantifier.possessive?
|
105
57
|
end
|
106
58
|
|
107
|
-
def
|
59
|
+
def to_h
|
108
60
|
{
|
109
61
|
type: type,
|
110
62
|
token: token,
|
@@ -118,6 +70,6 @@ module Regexp::Expression
|
|
118
70
|
quantifier: quantified? ? quantifier.to_h : nil,
|
119
71
|
}
|
120
72
|
end
|
121
|
-
alias :
|
73
|
+
alias :attributes :to_h
|
122
74
|
end
|
123
75
|
end
|
@@ -20,8 +20,8 @@ module Regexp::Expression
|
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
"#{text}#{'^' if negated?}
|
23
|
+
def parts
|
24
|
+
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end # module Regexp::Expression
|
File without changes
|
@@ -55,8 +55,8 @@ module Regexp::Expression
|
|
55
55
|
condition.reference
|
56
56
|
end
|
57
57
|
|
58
|
-
def
|
59
|
-
|
58
|
+
def parts
|
59
|
+
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
60
|
end
|
61
61
|
|
62
62
|
def initialize_copy(orig)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
|
4
|
+
def parts
|
5
|
+
[text.dup, *expressions, ')']
|
6
6
|
end
|
7
7
|
|
8
8
|
def capturing?; false end
|
@@ -18,9 +18,9 @@ module Regexp::Expression
|
|
18
18
|
super
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
21
|
+
def parts
|
22
22
|
if implicit?
|
23
|
-
|
23
|
+
expressions
|
24
24
|
else
|
25
25
|
super
|
26
26
|
end
|
@@ -65,8 +65,8 @@ module Regexp::Expression
|
|
65
65
|
end
|
66
66
|
|
67
67
|
class Comment < Group::Base
|
68
|
-
def
|
69
|
-
text.dup
|
68
|
+
def parts
|
69
|
+
[text.dup]
|
70
70
|
end
|
71
71
|
|
72
72
|
def comment?; true end
|
@@ -1,11 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Root < Regexp::Expression::Subexpression
|
3
3
|
def self.build(options = {})
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def self.build_token
|
8
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
9
7
|
end
|
10
8
|
end
|
11
9
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
|
29
|
+
elsif self == Alternation || self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
elsif self <= EscapeSequence::Base
|
32
|
+
Regexp::Syntax::Token::Escape
|
33
|
+
else
|
34
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def token_class
|
40
|
+
self.class.token_class
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -112,7 +112,7 @@ module Regexp::Expression
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
115
|
+
dummy = Regexp::Expression::Root.construct
|
116
116
|
dummy.expressions = expressions.map(&:clone)
|
117
117
|
dummy.quantifier = quantifier && quantifier.clone
|
118
118
|
dummy.match_length
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,14 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
def ==(other)
|
99
|
+
other.class == self.class &&
|
100
|
+
other.to_s == to_s &&
|
101
|
+
other.options == options
|
102
|
+
end
|
103
|
+
alias :=== :==
|
104
|
+
alias :eql? :==
|
96
105
|
end
|
97
106
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
7
|
+
include Regexp::Expression::Shared
|
8
|
+
|
3
9
|
MODES = %i[greedy possessive reluctant]
|
4
10
|
|
5
|
-
attr_reader :
|
11
|
+
attr_reader :min, :max, :mode
|
6
12
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_copy(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
13
|
+
def initialize(*args)
|
14
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
15
|
|
20
|
-
|
21
|
-
|
16
|
+
init_from_token_and_options(*args)
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
+
@min, @max = minmax
|
19
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
21
|
end
|
23
|
-
alias :to_str :to_s
|
24
22
|
|
25
23
|
def to_h
|
26
24
|
{
|
@@ -41,13 +39,33 @@ module Regexp::Expression
|
|
41
39
|
end
|
42
40
|
alias :lazy? :reluctant?
|
43
41
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically.\n"\
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
52
|
+
@token = token
|
53
|
+
@text = text
|
54
|
+
@min = min
|
55
|
+
@max = max
|
56
|
+
@mode = mode
|
57
|
+
end
|
58
|
+
|
59
|
+
def minmax
|
60
|
+
case token
|
61
|
+
when /zero_or_one/ then [0, 1]
|
62
|
+
when /zero_or_more/ then [0, -1]
|
63
|
+
when /one_or_more/ then [1, -1]
|
64
|
+
when :interval
|
65
|
+
int_min = text[/\{(\d*)/, 1]
|
66
|
+
int_max = text[/,?(\d*)\}/, 1]
|
67
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
68
|
+
end
|
50
69
|
end
|
51
|
-
alias :eq :==
|
52
70
|
end
|
53
71
|
end
|
@@ -7,31 +7,17 @@ module Regexp::Expression
|
|
7
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
8
8
|
class Sequence < Regexp::Expression::Subexpression
|
9
9
|
class << self
|
10
|
-
def add_to(
|
11
|
-
sequence =
|
12
|
-
|
13
|
-
|
14
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
15
|
)
|
16
|
-
sequence.nesting_level =
|
16
|
+
sequence.nesting_level = exp.nesting_level + 1
|
17
17
|
sequence.options = active_opts
|
18
|
-
|
18
|
+
exp.expressions << sequence
|
19
19
|
sequence
|
20
20
|
end
|
21
|
-
|
22
|
-
def at_levels(level, set_level, conditional_level)
|
23
|
-
token = Regexp::Token.new(
|
24
|
-
:expression,
|
25
|
-
:sequence,
|
26
|
-
'',
|
27
|
-
nil, # ts
|
28
|
-
nil, # te
|
29
|
-
level,
|
30
|
-
set_level,
|
31
|
-
conditional_level
|
32
|
-
)
|
33
|
-
new(token)
|
34
|
-
end
|
35
21
|
end
|
36
22
|
|
37
23
|
def starts_at
|
@@ -39,12 +25,12 @@ module Regexp::Expression
|
|
39
25
|
end
|
40
26
|
alias :ts :starts_at
|
41
27
|
|
42
|
-
def quantify(
|
28
|
+
def quantify(*args)
|
43
29
|
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
30
|
target or raise Regexp::Parser::Error,
|
45
31
|
"No valid target found for '#{text}' quantifier"
|
46
32
|
|
47
|
-
target.quantify(
|
33
|
+
target.quantify(*args)
|
48
34
|
end
|
49
35
|
end
|
50
36
|
end
|