regexp_parser 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +63 -6
- data/Gemfile +1 -0
- data/README.md +12 -6
- data/lib/regexp_parser/error.rb +1 -1
- data/lib/regexp_parser/expression/base.rb +9 -57
- data/lib/regexp_parser/expression/classes/backreference.rb +1 -0
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -2
- data/lib/regexp_parser/expression/classes/character_set.rb +2 -2
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -2
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -1
- data/lib/regexp_parser/expression/classes/group.rb +6 -6
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +3 -5
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +1 -0
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +10 -1
- data/lib/regexp_parser/expression/quantifier.rb +41 -23
- data/lib/regexp_parser/expression/sequence.rb +9 -23
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -2
- data/lib/regexp_parser/expression/shared.rb +85 -0
- data/lib/regexp_parser/expression/subexpression.rb +11 -7
- data/lib/regexp_parser/expression.rb +4 -2
- data/lib/regexp_parser/parser.rb +21 -72
- data/lib/regexp_parser/scanner/property.rl +1 -1
- data/lib/regexp_parser/scanner/scanner.rl +42 -31
- data/lib/regexp_parser/scanner.rb +725 -793
- data/lib/regexp_parser/syntax/token/escape.rb +1 -1
- data/lib/regexp_parser/syntax/token/unicode_property.rb +0 -5
- data/lib/regexp_parser/version.rb +1 -1
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f871ec3cdea5a594f72f5386f1b344710e6204f7307ba40d966653197f526be8
|
4
|
+
data.tar.gz: dd93c880f29ec77531faa2379fbfc8e34a9b67680664c6a3477d38afeaa1809a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 45e52ab0ce7bec3e4a275efa3828532778c49e8d36eec1ea82a43755a87abc9eee97e986027aa8f5c64fd604f15164d2ad4f37e5d6e22a5a1e3e9da6788271b9
|
7
|
+
data.tar.gz: 1f5514f3252294d9fe0877cff1d8b0db0400838c97ed78d15bbb794b94595c20d081681e4b1fe9bb6c89be7749514d8b2b8cf385360d002cd89e2a76ce6d2e63
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,62 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
### Added
|
4
|
+
|
5
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
6
|
+
|
7
|
+
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
8
|
+
|
9
|
+
### Fixed
|
10
|
+
|
11
|
+
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
12
|
+
- they used to be treated as reluctant or possessive mode indicators
|
13
|
+
- however, Ruby does not support these modes for interval quantifiers
|
14
|
+
- they are now treated as chained quantifiers instead, as Ruby does it
|
15
|
+
- c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
16
|
+
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
17
|
+
- e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
18
|
+
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
19
|
+
- they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
20
|
+
- they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
21
|
+
|
22
|
+
### Added
|
23
|
+
|
24
|
+
- added `Expression::Base#==` for (deep) comparison of expressions
|
25
|
+
- added `Expression::Base#parts`
|
26
|
+
- returns the text elements and subexpressions of an expression
|
27
|
+
- e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
28
|
+
- added `Expression::Base#te` (a.k.a. token end index)
|
29
|
+
- `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
30
|
+
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
31
|
+
- `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
32
|
+
- `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
33
|
+
- `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
34
|
+
- this allows a more unified handling with `Expression::Base` instances
|
35
|
+
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
36
|
+
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
37
|
+
|
38
|
+
Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
|
39
|
+
is deprecated.
|
40
|
+
|
41
|
+
It will no longer be supported in regexp_parser v3.0.0.
|
42
|
+
|
43
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
44
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
45
|
+
will be derived automatically.
|
46
|
+
|
47
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
48
|
+
|
49
|
+
This is consistent with how Expression::Base instances are created.
|
50
|
+
|
51
|
+
|
52
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
53
|
+
|
54
|
+
### Fixed
|
55
|
+
|
56
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
57
|
+
- these were never supported by Ruby or the `Regexp::Scanner`
|
58
|
+
- thanks to [Markus Schirp](https://github.com/mbj) for the report
|
59
|
+
|
3
60
|
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
61
|
|
5
62
|
### Added
|
@@ -180,7 +237,7 @@
|
|
180
237
|
|
181
238
|
### Added
|
182
239
|
|
183
|
-
- `Expression#each_expression` and `#traverse` can now be called without a block
|
240
|
+
- `Expression::Base#each_expression` and `#traverse` can now be called without a block
|
184
241
|
* this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
|
185
242
|
* thanks to [Masataka Kuwabara](https://github.com/pocke)
|
186
243
|
|
@@ -206,7 +263,7 @@
|
|
206
263
|
- Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
|
207
264
|
- Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
|
208
265
|
- Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
|
209
|
-
- Fixed `Expression#match` and `#=~` not working with a single argument
|
266
|
+
- Fixed `Expression::Base#match` and `#=~` not working with a single argument
|
210
267
|
|
211
268
|
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
212
269
|
|
@@ -214,15 +271,15 @@
|
|
214
271
|
|
215
272
|
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
216
273
|
* returns the `Group` expression that is being referenced via name or number
|
217
|
-
- Added `Expression#repetitions`
|
274
|
+
- Added `Expression::Base#repetitions`
|
218
275
|
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
219
276
|
* like `#quantity` but with a more uniform interface
|
220
|
-
- Added `Expression#match_length`
|
277
|
+
- Added `Expression::Base#match_length`
|
221
278
|
* allows to inspect and iterate over String lengths matched by the Expression
|
222
279
|
|
223
280
|
### Fixed
|
224
281
|
|
225
|
-
- Fixed `Expression#clone` "direction"
|
282
|
+
- Fixed `Expression::Base#clone` "direction"
|
226
283
|
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
227
284
|
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
228
285
|
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
@@ -384,7 +441,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
384
441
|
- Fixed a thread safety issue (issue #45)
|
385
442
|
- Some public class methods that were only reliable for
|
386
443
|
internal use are now private instance methods (PR #46)
|
387
|
-
- Improved the usefulness of Expression#options (issue #43) -
|
444
|
+
- Improved the usefulness of Expression::Base#options (issue #43) -
|
388
445
|
#options and derived methods such as #i?, #m? and #x? are now
|
389
446
|
defined for all Expressions that are affected by such flags.
|
390
447
|
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -367,12 +367,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
367
367
|
| **POSIX Classes** | `[:alpha:]`, `[:^digit:]` | ✓ |
|
368
368
|
| **Quantifiers** | | ⋱ |
|
369
369
|
|   _**Greedy**_ | `?`, `*`, `+`, `{m,M}` | ✓ |
|
370
|
-
|   _**Reluctant** (Lazy)_ | `??`, `*?`,
|
371
|
-
|   _**Possessive**_ | `?+`, `*+`,
|
370
|
+
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?` \[1\] | ✓ |
|
371
|
+
|   _**Possessive**_ | `?+`, `*+`, `++` \[1\] | ✓ |
|
372
372
|
| **String Escapes** | | ⋱ |
|
373
|
-
|   _**Control** \[
|
373
|
+
|   _**Control** \[2\]_ | `\C-C`, `\cD` | ✓ |
|
374
374
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
375
|
-
|   _**Meta** \[
|
375
|
+
|   _**Meta** \[2\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
376
376
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
377
377
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
378
378
|
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
@@ -384,7 +384,11 @@ _Note that not all of these are available in all versions of Ruby_
|
|
384
384
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
385
385
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
386
386
|
|
387
|
-
**\[1\]**:
|
387
|
+
**\[1\]**: Ruby does not support lazy or possessive interval quantifiers. Any `+` or `?` that follows an interval
|
388
|
+
quantifier will be treated as another, chained quantifier. See also [#3](https://github.com/ammar/regexp_parser/issue/3),
|
389
|
+
[#69](https://github.com/ammar/regexp_parser/pull/69).
|
390
|
+
|
391
|
+
**\[2\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
388
392
|
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
389
393
|
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
390
394
|
|
@@ -443,12 +447,14 @@ Projects using regexp_parser.
|
|
443
447
|
|
444
448
|
- [capybara](https://github.com/teamcapybara/capybara) is an integration testing tool that uses regexp_parser to convert Regexps to css/xpath selectors.
|
445
449
|
|
446
|
-
- [js_regex](https://github.com/
|
450
|
+
- [js_regex](https://github.com/jaynetics/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
447
451
|
|
448
452
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
449
453
|
|
450
454
|
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
451
455
|
|
456
|
+
- [repper](https://github.com/jaynetics/repper) is a regular expression pretty-printer for Ruby.
|
457
|
+
|
452
458
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
453
459
|
|
454
460
|
- [twitter-cldr-rb](https://github.com/twitter/twitter-cldr-rb) is a localization helper that uses regexp_parser to generate examples of postal codes.
|
data/lib/regexp_parser/error.rb
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Base
|
3
|
-
|
4
|
-
attr_accessor :text, :ts
|
5
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
-
|
7
|
-
attr_accessor :quantifier
|
8
|
-
attr_accessor :options
|
3
|
+
include Regexp::Expression::Shared
|
9
4
|
|
10
5
|
def initialize(token, options = {})
|
11
|
-
|
12
|
-
self.token = token.token
|
13
|
-
self.text = token.text
|
14
|
-
self.ts = token.ts
|
15
|
-
self.level = token.level
|
16
|
-
self.set_level = token.set_level
|
17
|
-
self.conditional_level = token.conditional_level
|
18
|
-
self.nesting_level = 0
|
19
|
-
self.quantifier = nil
|
20
|
-
self.options = options
|
6
|
+
init_from_token_and_options(token, options)
|
21
7
|
end
|
22
8
|
|
23
9
|
def initialize_copy(orig)
|
24
|
-
self.text =
|
25
|
-
self.options =
|
26
|
-
self.quantifier =
|
10
|
+
self.text = orig.text.dup if orig.text
|
11
|
+
self.options = orig.options.dup if orig.options
|
12
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
27
13
|
super
|
28
14
|
end
|
29
15
|
|
@@ -31,48 +17,14 @@ module Regexp::Expression
|
|
31
17
|
::Regexp.new(to_s(format))
|
32
18
|
end
|
33
19
|
|
34
|
-
|
35
|
-
|
36
|
-
def base_length
|
37
|
-
to_s(:base).length
|
38
|
-
end
|
39
|
-
|
40
|
-
def full_length
|
41
|
-
to_s.length
|
42
|
-
end
|
43
|
-
|
44
|
-
def offset
|
45
|
-
[starts_at, full_length]
|
46
|
-
end
|
47
|
-
|
48
|
-
def coded_offset
|
49
|
-
'@%d+%d' % offset
|
50
|
-
end
|
51
|
-
|
52
|
-
def to_s(format = :full)
|
53
|
-
"#{text}#{quantifier_affix(format)}"
|
54
|
-
end
|
55
|
-
|
56
|
-
def quantifier_affix(expression_format)
|
57
|
-
quantifier.to_s if quantified? && expression_format != :base
|
58
|
-
end
|
59
|
-
|
60
|
-
def terminal?
|
61
|
-
!respond_to?(:expressions)
|
62
|
-
end
|
63
|
-
|
64
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
20
|
+
def quantify(*args)
|
21
|
+
self.quantifier = Quantifier.new(*args)
|
66
22
|
end
|
67
23
|
|
68
24
|
def unquantified_clone
|
69
25
|
clone.tap { |exp| exp.quantifier = nil }
|
70
26
|
end
|
71
27
|
|
72
|
-
def quantified?
|
73
|
-
!quantifier.nil?
|
74
|
-
end
|
75
|
-
|
76
28
|
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
29
|
def quantity
|
78
30
|
return [nil,nil] unless quantified?
|
@@ -104,7 +56,7 @@ module Regexp::Expression
|
|
104
56
|
quantified? and quantifier.possessive?
|
105
57
|
end
|
106
58
|
|
107
|
-
def
|
59
|
+
def to_h
|
108
60
|
{
|
109
61
|
type: type,
|
110
62
|
token: token,
|
@@ -118,6 +70,6 @@ module Regexp::Expression
|
|
118
70
|
quantifier: quantified? ? quantifier.to_h : nil,
|
119
71
|
}
|
120
72
|
end
|
121
|
-
alias :
|
73
|
+
alias :attributes :to_h
|
122
74
|
end
|
123
75
|
end
|
@@ -20,8 +20,8 @@ module Regexp::Expression
|
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
"#{text}#{'^' if negated?}
|
23
|
+
def parts
|
24
|
+
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end # module Regexp::Expression
|
File without changes
|
@@ -55,8 +55,8 @@ module Regexp::Expression
|
|
55
55
|
condition.reference
|
56
56
|
end
|
57
57
|
|
58
|
-
def
|
59
|
-
|
58
|
+
def parts
|
59
|
+
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
60
|
end
|
61
61
|
|
62
62
|
def initialize_copy(orig)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
|
4
|
+
def parts
|
5
|
+
[text.dup, *expressions, ')']
|
6
6
|
end
|
7
7
|
|
8
8
|
def capturing?; false end
|
@@ -18,9 +18,9 @@ module Regexp::Expression
|
|
18
18
|
super
|
19
19
|
end
|
20
20
|
|
21
|
-
def
|
21
|
+
def parts
|
22
22
|
if implicit?
|
23
|
-
|
23
|
+
expressions
|
24
24
|
else
|
25
25
|
super
|
26
26
|
end
|
@@ -65,8 +65,8 @@ module Regexp::Expression
|
|
65
65
|
end
|
66
66
|
|
67
67
|
class Comment < Group::Base
|
68
|
-
def
|
69
|
-
text.dup
|
68
|
+
def parts
|
69
|
+
[text.dup]
|
70
70
|
end
|
71
71
|
|
72
72
|
def comment?; true end
|
@@ -1,11 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Root < Regexp::Expression::Subexpression
|
3
3
|
def self.build(options = {})
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
def self.build_token
|
8
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
9
7
|
end
|
10
8
|
end
|
11
9
|
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp & token class names for alt., dot, escapes in v3.0.0
|
29
|
+
elsif self == Alternation || self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
elsif self <= EscapeSequence::Base
|
32
|
+
Regexp::Syntax::Token::Escape
|
33
|
+
else
|
34
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def token_class
|
40
|
+
self.class.token_class
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -112,7 +112,7 @@ module Regexp::Expression
|
|
112
112
|
end
|
113
113
|
|
114
114
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
115
|
+
dummy = Regexp::Expression::Root.construct
|
116
116
|
dummy.expressions = expressions.map(&:clone)
|
117
117
|
dummy.quantifier = quantifier && quantifier.clone
|
118
118
|
dummy.match_length
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,14 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
def ==(other)
|
99
|
+
other.class == self.class &&
|
100
|
+
other.to_s == to_s &&
|
101
|
+
other.options == options
|
102
|
+
end
|
103
|
+
alias :=== :==
|
104
|
+
alias :eql? :==
|
96
105
|
end
|
97
106
|
end
|
@@ -1,26 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
7
|
+
include Regexp::Expression::Shared
|
8
|
+
|
3
9
|
MODES = %i[greedy possessive reluctant]
|
4
10
|
|
5
|
-
attr_reader :
|
11
|
+
attr_reader :min, :max, :mode
|
6
12
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_copy(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
13
|
+
def initialize(*args)
|
14
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
15
|
|
20
|
-
|
21
|
-
|
16
|
+
init_from_token_and_options(*args)
|
17
|
+
@mode = (token.to_s[/greedy|reluctant|possessive/] || :greedy).to_sym
|
18
|
+
@min, @max = minmax
|
19
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
20
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
21
|
end
|
23
|
-
alias :to_str :to_s
|
24
22
|
|
25
23
|
def to_h
|
26
24
|
{
|
@@ -41,13 +39,33 @@ module Regexp::Expression
|
|
41
39
|
end
|
42
40
|
alias :lazy? :reluctant?
|
43
41
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def deprecated_old_init(token, text, min, max, mode = :greedy)
|
45
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
46
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
47
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
48
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
49
|
+
"will be derived automatically.\n"\
|
50
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
51
|
+
"This is consistent with how Expression::Base instances are created. "
|
52
|
+
@token = token
|
53
|
+
@text = text
|
54
|
+
@min = min
|
55
|
+
@max = max
|
56
|
+
@mode = mode
|
57
|
+
end
|
58
|
+
|
59
|
+
def minmax
|
60
|
+
case token
|
61
|
+
when /zero_or_one/ then [0, 1]
|
62
|
+
when /zero_or_more/ then [0, -1]
|
63
|
+
when /one_or_more/ then [1, -1]
|
64
|
+
when :interval
|
65
|
+
int_min = text[/\{(\d*)/, 1]
|
66
|
+
int_max = text[/,?(\d*)\}/, 1]
|
67
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
68
|
+
end
|
50
69
|
end
|
51
|
-
alias :eq :==
|
52
70
|
end
|
53
71
|
end
|
@@ -7,31 +7,17 @@ module Regexp::Expression
|
|
7
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
8
8
|
class Sequence < Regexp::Expression::Subexpression
|
9
9
|
class << self
|
10
|
-
def add_to(
|
11
|
-
sequence =
|
12
|
-
|
13
|
-
|
14
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
15
|
)
|
16
|
-
sequence.nesting_level =
|
16
|
+
sequence.nesting_level = exp.nesting_level + 1
|
17
17
|
sequence.options = active_opts
|
18
|
-
|
18
|
+
exp.expressions << sequence
|
19
19
|
sequence
|
20
20
|
end
|
21
|
-
|
22
|
-
def at_levels(level, set_level, conditional_level)
|
23
|
-
token = Regexp::Token.new(
|
24
|
-
:expression,
|
25
|
-
:sequence,
|
26
|
-
'',
|
27
|
-
nil, # ts
|
28
|
-
nil, # te
|
29
|
-
level,
|
30
|
-
set_level,
|
31
|
-
conditional_level
|
32
|
-
)
|
33
|
-
new(token)
|
34
|
-
end
|
35
21
|
end
|
36
22
|
|
37
23
|
def starts_at
|
@@ -39,12 +25,12 @@ module Regexp::Expression
|
|
39
25
|
end
|
40
26
|
alias :ts :starts_at
|
41
27
|
|
42
|
-
def quantify(
|
28
|
+
def quantify(*args)
|
43
29
|
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
30
|
target or raise Regexp::Parser::Error,
|
45
31
|
"No valid target found for '#{text}' quantifier"
|
46
32
|
|
47
|
-
target.quantify(
|
33
|
+
target.quantify(*args)
|
48
34
|
end
|
49
35
|
end
|
50
36
|
end
|