regexp_parser 1.0.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -30
- data/README.md +29 -29
- data/lib/regexp_parser/expression.rb +5 -4
- data/lib/regexp_parser/expression/classes/conditional.rb +27 -23
- data/lib/regexp_parser/expression/classes/group.rb +3 -1
- data/lib/regexp_parser/expression/classes/root.rb +17 -5
- data/lib/regexp_parser/expression/methods/traverse.rb +0 -1
- data/lib/regexp_parser/expression/quantifier.rb +11 -2
- data/lib/regexp_parser/expression/sequence.rb +37 -13
- data/lib/regexp_parser/expression/sequence_operation.rb +1 -6
- data/lib/regexp_parser/expression/subexpression.rb +15 -4
- data/lib/regexp_parser/parser.rb +19 -10
- data/lib/regexp_parser/scanner.rb +8 -8
- data/lib/regexp_parser/scanner/scanner.rl +8 -8
- data/lib/regexp_parser/version.rb +1 -1
- data/test/expression/test_subexpression.rb +9 -0
- data/test/lexer/test_refcalls.rb +3 -0
- data/test/parser/test_all.rb +3 -3
- data/test/parser/test_conditionals.rb +47 -10
- data/test/parser/test_groups.rb +22 -0
- data/test/parser/test_quantifiers.rb +43 -1
- data/test/parser/test_refcalls.rb +36 -20
- data/test/scanner/test_all.rb +4 -4
- data/test/scanner/test_refcalls.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20ba21704667276107a1041b3bb5943bbbec0078f706cf0d7db85110631dfe8d
|
4
|
+
data.tar.gz: 87886f6cad480ebc62f3e1f243d9b61170097e5419fc8b3972cd3348e5d8d7e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0678640973741b2ea63053c058809fa075b3b465756bddee9a1914f67f7181a3681d3592662d4eadf5a60e844c550950b371577239924c4d3ce7f07f9fdfefa6'
|
7
|
+
data.tar.gz: 3bf18d0d7989c1f9eef010d1579ac78537c6c083c9b7c7c2f0cda094c0f973e1fdcc17c5992ae35d823720d2cdb10a60424876e08bd4b2b60b125c8b107a62bf
|
data/CHANGELOG.md
CHANGED
@@ -1,54 +1,84 @@
|
|
1
|
-
## [
|
1
|
+
## [1.2.0] - 2018-09-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
- `Subexpression` (branch node) includes `Enumerable`, allowing to `#select` children etc.
|
6
|
+
|
7
|
+
### Fixed
|
8
|
+
|
9
|
+
- Fixed missing quantifier in `Conditional::Expression` methods `#to_s`, `#to_re`
|
10
|
+
- `Conditional::Condition` no longer lives outside the recursive `#expressions` tree
|
11
|
+
- it used to be the only expression stored in a custom ivar, complicating traversal
|
12
|
+
- its setter and getter (`#condition=`, `#condition`) still work as before
|
13
|
+
|
14
|
+
## [1.1.0] - 2018-09-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
15
|
+
|
16
|
+
### Added
|
17
|
+
|
18
|
+
- Added `Quantifier` methods `#greedy?`, `#possessive?`, `#reluctant?`/`#lazy?`
|
19
|
+
- Added `Group::Options#option_changes`
|
20
|
+
- shows the options enabled or disabled by the given options group
|
21
|
+
- as with all other expressions, `#options` shows the overall active options
|
22
|
+
- Added `Conditional#reference` and `Condition#reference`, indicating the determinative group
|
23
|
+
- Added `Subexpression#dig`, acts like [`Array#dig`](http://ruby-doc.org/core-2.5.0/Array.html#method-i-dig)
|
24
|
+
|
25
|
+
### Fixed
|
26
|
+
|
27
|
+
- Fixed parsing of quantified conditional expressions (quantifiers were assigned to the wrong expression)
|
28
|
+
- Fixed scanning and parsing of forward-referring subexpression calls (e.g. `\g<+1>`)
|
29
|
+
- `Root` and `Sequence` expressions now support the same constructor signature as all other expressions
|
30
|
+
|
31
|
+
## [1.0.0] - 2018-09-01 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
32
|
|
3
33
|
This release includes several breaking changes, mostly to character sets, #map and properties.
|
4
34
|
|
5
35
|
### Changed
|
6
36
|
|
7
37
|
- Changed handling of sets (a.k.a. character classes or "bracket expressions")
|
8
|
-
* see PR #55 / issue #47 for details
|
38
|
+
* see PR [#55](https://github.com/ammar/regexp_parser/pull/55) / issue [#47](https://github.com/ammar/regexp_parser/issues/47) for details
|
9
39
|
* sets are now parsed to expression trees like other nestable expressions
|
10
|
-
*
|
11
|
-
* CharacterSet#members has been removed
|
12
|
-
* new Range and Intersection classes represent corresponding syntax features
|
13
|
-
* a new PosixClass expression class represents e.g. [[:ascii:]]
|
14
|
-
* PosixClass instances behave like Property ones, e.g. support
|
15
|
-
*
|
16
|
-
- Changed Subexpression#map to act like regular Enumerable#map
|
17
|
-
* the old behavior is available as Subexpression#flat_map
|
18
|
-
* e.g. parse(/[a]/).map(&:to_s) == ["[a]"]
|
19
|
-
- Changed
|
20
|
-
* EscapeSequence::Codepoint
|
21
|
-
* they already existed, but were all parsed as EscapeSequence::Literal
|
22
|
-
* e.g.
|
23
|
-
- Changed naming of many property tokens (emitted for
|
24
|
-
* if you work with these tokens, see PR #56 for details
|
25
|
-
* e.g.
|
26
|
-
- Changed (?m) and the likes to emit as
|
27
|
-
* allows differentiating from group-local
|
28
|
-
- Changed name of Backreference::..NestLevel to
|
29
|
-
- Changed
|
40
|
+
* `#scan` now emits the same tokens as outside sets (no longer `:set, :member`)
|
41
|
+
* `CharacterSet#members` has been removed
|
42
|
+
* new `Range` and `Intersection` classes represent corresponding syntax features
|
43
|
+
* a new `PosixClass` expression class represents e.g. `[[:ascii:]]`
|
44
|
+
* `PosixClass` instances behave like `Property` ones, e.g. support `#negative?`
|
45
|
+
* `#scan` emits `:(non)posixclass, :<type>` instead of `:set, :char_(non)<type>`
|
46
|
+
- Changed `Subexpression#map` to act like regular `Enumerable#map`
|
47
|
+
* the old behavior is available as `Subexpression#flat_map`
|
48
|
+
* e.g. `parse(/[a]/).map(&:to_s) == ["[a]"]`; used to be `["[a]", "a"]`
|
49
|
+
- Changed expression emissions for some escape sequences
|
50
|
+
* `EscapeSequence::Codepoint`, `CodepointList`, `Hex` and `Octal` are now all used
|
51
|
+
* they already existed, but were all parsed as `EscapeSequence::Literal`
|
52
|
+
* e.g. `\x97` is now `EscapeSequence::Hex` instead of `EscapeSequence::Literal`
|
53
|
+
- Changed naming of many property tokens (emitted for `\p{...}`)
|
54
|
+
* if you work with these tokens, see PR [#56](https://github.com/ammar/regexp_parser/pull/56) for details
|
55
|
+
* e.g. `:punct_dash` is now `:dash_punctuation`
|
56
|
+
- Changed `(?m)` and the likes to emit as `:options_switch` token (@4ade4d1)
|
57
|
+
* allows differentiating from group-local `:options`, e.g. `(?m:.)`
|
58
|
+
- Changed name of `Backreference::..NestLevel` to `..RecursionLevel` (@4184339)
|
59
|
+
- Changed B`ackreference::Number#number` from `String` to `Integer` (@40a2231)
|
30
60
|
|
31
61
|
### Added
|
32
62
|
|
33
63
|
- Added support for all previously missing properties (about 250)
|
34
|
-
- Added Expression::UnicodeProperty#shortcut (e.g. returns "m" for
|
35
|
-
- Added
|
36
|
-
- Added
|
37
|
-
- Added
|
64
|
+
- Added `Expression::UnicodeProperty#shortcut` (e.g. returns "m" for `\p{mark}`)
|
65
|
+
- Added `#char(s)` and `#codepoint(s)` methods to all `EscapeSequence` expressions
|
66
|
+
- Added `#number`/`#name`/`#recursion_level` to all backref/call expressions (@174bf21)
|
67
|
+
- Added `#number` and `#number_at_level` to capturing group expressions (@40a2231)
|
38
68
|
|
39
69
|
### Fixed
|
40
70
|
|
41
|
-
- Fixed
|
71
|
+
- Fixed Ruby version mapping of some properties
|
42
72
|
- Fixed scanning of some property spellings, e.g. with dashes
|
43
73
|
- Fixed some incorrect property alias normalizations
|
44
|
-
- Fixed scanning of codepoint escapes with 6 digits (e.g.
|
45
|
-
- Fixed scanning of
|
74
|
+
- Fixed scanning of codepoint escapes with 6 digits (e.g. `\u{10FFFF}`)
|
75
|
+
- Fixed scanning of `\R` and `\X` within sets; they act as literals there
|
46
76
|
|
47
77
|
## [0.5.0] - 2018-04-29 - [Janosch Müller](mailto:janosch84@gmail.com)
|
48
78
|
|
49
79
|
### Changed
|
50
80
|
|
51
|
-
- Changed handling of Ruby versions (PR #53)
|
81
|
+
- Changed handling of Ruby versions (PR [#53](https://github.com/ammar/regexp_parser/pull/53))
|
52
82
|
* New Ruby versions are now supported by default
|
53
83
|
* Some deep-lying APIs have changed, which should not affect most users:
|
54
84
|
* `Regexp::Syntax::VERSIONS` is gone
|
data/README.md
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://secure.travis-ci.org/ammar/regexp_parser.svg?branch=master)](http://travis-ci.org/ammar/regexp_parser) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
4
|
|
5
|
-
A
|
5
|
+
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
6
|
|
7
7
|
* Multilayered
|
8
|
-
* A scanner/tokenizer based on [
|
8
|
+
* A scanner/tokenizer based on [Ragel](http://www.colm.net/open-source/ragel/)
|
9
9
|
* A lexer that produces a "stream" of token objects.
|
10
10
|
* A parser that produces a "tree" of Expression objects (OO API)
|
11
|
-
* Runs on
|
12
|
-
* Recognizes
|
11
|
+
* Runs on Ruby 1.9, 2.x, and JRuby (1.9 mode) runtimes.
|
12
|
+
* Recognizes Ruby 1.8, 1.9, and 2.x regular expressions [See Supported Syntax](#supported-syntax)
|
13
13
|
|
14
14
|
|
15
15
|
_For examples of regexp_parser in use, see [Example Projects](#example-projects)._
|
@@ -46,7 +46,7 @@ The three main modules are **Scanner**, **Lexer**, and **Parser**. Each of them
|
|
46
46
|
provides a single method that takes a regular expression (as a RegExp object or
|
47
47
|
a string) and returns its results. The **Lexer** and the **Parser** accept an
|
48
48
|
optional second argument that specifies the syntax version, like 'ruby/2.0',
|
49
|
-
which defaults to the host
|
49
|
+
which defaults to the host Ruby version (using RUBY_VERSION).
|
50
50
|
|
51
51
|
Here are the basic usage examples:
|
52
52
|
|
@@ -77,7 +77,7 @@ called with the results as follows:
|
|
77
77
|
## Components
|
78
78
|
|
79
79
|
### Scanner
|
80
|
-
A
|
80
|
+
A Ragel-generated scanner that recognizes the cumulative syntax of all
|
81
81
|
supported syntax versions. It breaks a given expression's text into the
|
82
82
|
smallest parts, and identifies their type, token, text, and start/end
|
83
83
|
offsets within the pattern.
|
@@ -123,7 +123,7 @@ Regexp::Scanner.scan( /(cat?([bhm]at)){3,5}/ ).map {|token| token[2]}
|
|
123
123
|
balancing punctuation and premature end of pattern. Flavor validity checks
|
124
124
|
are performed in the lexer, which uses a syntax object.
|
125
125
|
|
126
|
-
* If the input is a
|
126
|
+
* If the input is a Ruby **Regexp** object, the scanner calls #source on it to
|
127
127
|
get its string representation. #source does not include the options of
|
128
128
|
the expression (m, i, and x). To include the options in the scan, #to_s
|
129
129
|
should be called on the **Regexp** before passing it to the scanner or the
|
@@ -188,7 +188,7 @@ ruby_18.implements? :conditional, :condition # => false
|
|
188
188
|
Sits on top of the scanner and performs lexical analysis on the tokens that
|
189
189
|
it emits. Among its tasks are; breaking quantified literal runs, collecting the
|
190
190
|
emitted token attributes into Token objects, calculating their nesting depth,
|
191
|
-
normalizing tokens for the parser, and
|
191
|
+
normalizing tokens for the parser, and checking if the tokens are implemented by
|
192
192
|
the given syntax version.
|
193
193
|
|
194
194
|
See the [Token Objects](https://github.com/ammar/regexp_parser/wiki/Token-Objects)
|
@@ -196,7 +196,7 @@ wiki page for more information on Token objects.
|
|
196
196
|
|
197
197
|
|
198
198
|
#### Example
|
199
|
-
The following example lexes the given pattern, checks it against the
|
199
|
+
The following example lexes the given pattern, checks it against the Ruby 1.9
|
200
200
|
syntax, and prints the token objects' text indented to their level.
|
201
201
|
|
202
202
|
```ruby
|
@@ -224,7 +224,7 @@ end
|
|
224
224
|
|
225
225
|
A one-liner that returns an array of the textual parts of the given pattern.
|
226
226
|
Compare the output with that of the one-liner example of the **Scanner**; notably
|
227
|
-
how the sequence 'cat' is treated. The 't' is
|
227
|
+
how the sequence 'cat' is treated. The 't' is separated because it's followed
|
228
228
|
by a quantifier that only applies to it.
|
229
229
|
|
230
230
|
```ruby
|
@@ -233,7 +233,7 @@ Regexp::Lexer.scan( /(cat?([b]at)){3,5}/ ).map {|token| token.text}
|
|
233
233
|
```
|
234
234
|
|
235
235
|
#### Notes
|
236
|
-
* The syntax argument is optional. It defaults to the version of the
|
236
|
+
* The syntax argument is optional. It defaults to the version of the Ruby
|
237
237
|
interpreter in use, as returned by RUBY_VERSION.
|
238
238
|
|
239
239
|
* The lexer normalizes some tokens, as noted in the Syntax section above.
|
@@ -308,8 +308,8 @@ Expression class. See the next section for details._
|
|
308
308
|
|
309
309
|
|
310
310
|
## Supported Syntax
|
311
|
-
The three modules support all the regular expression syntax features of Ruby 1.8
|
312
|
-
|
311
|
+
The three modules support all the regular expression syntax features of Ruby 1.8,
|
312
|
+
1.9, and 2.x:
|
313
313
|
|
314
314
|
_Note that not all of these are available in all versions of Ruby_
|
315
315
|
|
@@ -317,8 +317,8 @@ _Note that not all of these are available in all versions of Ruby_
|
|
317
317
|
| Syntax Feature | Examples | ⋯ |
|
318
318
|
| ------------------------------------- | ------------------------------------------------------- |:--------:|
|
319
319
|
| **Alternation** | `a\|b\|c` | ✓ |
|
320
|
-
| **Anchors** | `^`,
|
321
|
-
| **Character Classes** | `[abc]`, `[^\\]`, `[a-d&&
|
320
|
+
| **Anchors** | `\A`, `^`, `\b` | ✓ |
|
321
|
+
| **Character Classes** | `[abc]`, `[^\\]`, `[a-d&&aeiou]`, `[a=e=b]` | ✓ |
|
322
322
|
| **Character Types** | `\d`, `\H`, `\s` | ✓ |
|
323
323
|
| **Cluster Types** | `\R`, `\X` | ✓ |
|
324
324
|
| **Conditional Exps.** | `(?(cond)yes-subexp)`, `(?(cond)yes-subexp\|no-subexp)` | ✓ |
|
@@ -341,7 +341,7 @@ _Note that not all of these are available in all versions of Ruby_
|
|
341
341
|
|   _**Capturing**_ | `(abc)` | ✓ |
|
342
342
|
|   _**Comments**_ | `(?# comment text)` | ✓ |
|
343
343
|
|   _**Named**_ | `(?<name>abc)`, `(?'name'abc)` | ✓ |
|
344
|
-
|   _**Options**_ | `(?mi-x:abc)`, `(?a:\s\w+)`
|
344
|
+
|   _**Options**_ | `(?mi-x:abc)`, `(?a:\s\w+)`, `(?i)` | ✓ |
|
345
345
|
|   _**Passive**_ | `(?:abc)` | ✓ |
|
346
346
|
|   _**Subexp. Calls**_ | `\g<name>`, `\g<1>` | ✓ |
|
347
347
|
| **Keep** | `\K`, `(ab\Kc\|d\Ke)f` | ✓ |
|
@@ -357,14 +357,14 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
357
|
|   _**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
358
358
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
359
359
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
360
|
-
| **Unicode Properties** | _<sub>([Unicode
|
361
|
-
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`
|
362
|
-
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`
|
363
|
-
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`
|
364
|
-
|   _**Derived**_ | `\p{Math}`, `\P{Lowercase}`
|
365
|
-
|   _**General Categories**_ | `\p{Lu}`, `\P{Cs}`
|
366
|
-
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`
|
367
|
-
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`
|
360
|
+
| **Unicode Properties** | _<sub>([Unicode 10.0.0](http://www.unicode.org/versions/Unicode10.0.0/))</sub>_ | ⋱ |
|
361
|
+
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
362
|
+
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
363
|
+
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
364
|
+
|   _**Derived**_ | `\p{Math}`, `\P{Lowercase}`, `\p{^Cased}` | ✓ |
|
365
|
+
|   _**General Categories**_ | `\p{Lu}`, `\P{Cs}`, `\p{^sc}` | ✓ |
|
366
|
+
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
367
|
+
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
368
368
|
|
369
369
|
##### Inapplicable Features
|
370
370
|
|
@@ -389,9 +389,9 @@ or incorrectly return tokens/objects as literals._
|
|
389
389
|
## Testing
|
390
390
|
To run the tests simply run rake from the root directory, as 'test' is the default task.
|
391
391
|
|
392
|
-
It generates the scanner's code from the
|
392
|
+
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
393
393
|
|
394
|
-
The tests use
|
394
|
+
The tests use Ruby's test/unit. They can also be run with:
|
395
395
|
|
396
396
|
```
|
397
397
|
bin/test
|
@@ -409,16 +409,16 @@ It is sometimes helpful during development to focus on a specific test case, for
|
|
409
409
|
bin/test test/expression/test_base.rb -n test_expression_to_re
|
410
410
|
```
|
411
411
|
|
412
|
-
Note that changes to
|
412
|
+
Note that changes to Ragel files will not be reflected when using `bin/test`, so you might want to run:
|
413
413
|
|
414
414
|
```
|
415
415
|
rake ragel:rb && bin/test test/scanner/test_properties.rb
|
416
416
|
```
|
417
417
|
|
418
418
|
## Building
|
419
|
-
Building the scanner and the gem requires [
|
419
|
+
Building the scanner and the gem requires [Ragel](http://www.colm.net/open-source/ragel/) to be
|
420
420
|
installed. The build tasks will automatically invoke the 'ragel:rb' task to generate the
|
421
|
-
|
421
|
+
Ruby scanner code.
|
422
422
|
|
423
423
|
|
424
424
|
The project uses the standard rubygems package tasks, so:
|
@@ -72,16 +72,16 @@ module Regexp::Expression
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def greedy?
|
75
|
-
quantified? and quantifier.
|
75
|
+
quantified? and quantifier.greedy?
|
76
76
|
end
|
77
77
|
|
78
78
|
def reluctant?
|
79
|
-
quantified? and quantifier.
|
79
|
+
quantified? and quantifier.reluctant?
|
80
80
|
end
|
81
81
|
alias :lazy? :reluctant?
|
82
82
|
|
83
83
|
def possessive?
|
84
|
-
quantified? and quantifier.
|
84
|
+
quantified? and quantifier.possessive?
|
85
85
|
end
|
86
86
|
|
87
87
|
def multiline?
|
@@ -127,7 +127,7 @@ module Regexp::Expression
|
|
127
127
|
end
|
128
128
|
alias :=~ :match
|
129
129
|
|
130
|
-
def
|
130
|
+
def attributes
|
131
131
|
{
|
132
132
|
type: type,
|
133
133
|
token: token,
|
@@ -141,6 +141,7 @@ module Regexp::Expression
|
|
141
141
|
quantifier: quantified? ? quantifier.to_h : nil,
|
142
142
|
}
|
143
143
|
end
|
144
|
+
alias :to_h :attributes
|
144
145
|
end
|
145
146
|
|
146
147
|
def self.parsed(exp)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module Conditional
|
4
3
|
class TooManyBranches < StandardError
|
5
4
|
def initialize
|
@@ -7,43 +6,48 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
end
|
9
8
|
|
10
|
-
class Condition < Regexp::Expression::Base
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def initialize(token, options = {})
|
17
|
-
super
|
18
|
-
@branches = []
|
9
|
+
class Condition < Regexp::Expression::Base
|
10
|
+
# Name or number of the referenced capturing group that determines state.
|
11
|
+
# Returns a String if reference is by name, Integer if by number.
|
12
|
+
def reference
|
13
|
+
ref = text.tr("'<>()", "")
|
14
|
+
ref =~ /\D/ ? ref : Integer(ref)
|
19
15
|
end
|
16
|
+
end
|
20
17
|
|
21
|
-
|
22
|
-
@condition = exp
|
23
|
-
expressions << exp
|
24
|
-
end
|
18
|
+
class Branch < Regexp::Expression::Sequence; end
|
25
19
|
|
20
|
+
class Expression < Regexp::Expression::Subexpression
|
26
21
|
def <<(exp)
|
27
22
|
expressions.last << exp
|
28
23
|
end
|
29
24
|
|
30
|
-
def
|
25
|
+
def add_sequence
|
31
26
|
raise TooManyBranches.new if branches.length == 2
|
27
|
+
Branch.add_to(self, { conditional_level: conditional_level + 1 })
|
28
|
+
end
|
29
|
+
alias :branch :add_sequence
|
32
30
|
|
33
|
-
|
31
|
+
def condition=(exp)
|
32
|
+
expressions.delete(condition)
|
33
|
+
expressions.unshift(exp)
|
34
|
+
end
|
34
35
|
|
35
|
-
|
36
|
-
|
36
|
+
def condition
|
37
|
+
find { |subexp| subexp.is_a?(Condition) }
|
37
38
|
end
|
38
39
|
|
39
|
-
def
|
40
|
-
|
40
|
+
def branches
|
41
|
+
select { |subexp| subexp.is_a?(Sequence) }
|
41
42
|
end
|
42
43
|
|
43
|
-
def
|
44
|
-
|
44
|
+
def reference
|
45
|
+
condition.reference
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_s(format = :full)
|
49
|
+
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
45
50
|
end
|
46
51
|
end
|
47
52
|
end
|
48
|
-
|
49
53
|
end
|
@@ -12,8 +12,10 @@ module Regexp::Expression
|
|
12
12
|
|
13
13
|
class Atomic < Group::Base; end
|
14
14
|
class Passive < Group::Base; end
|
15
|
-
class Options < Group::Base; end
|
16
15
|
class Absence < Group::Base; end
|
16
|
+
class Options < Group::Base
|
17
|
+
attr_accessor :option_changes
|
18
|
+
end
|
17
19
|
|
18
20
|
class Capture < Group::Base
|
19
21
|
attr_accessor :number, :number_at_level
|
@@ -1,12 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
|
3
3
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
|
4
|
+
# TODO: this override is here for backwards compatibility, remove in 2.0.0
|
5
|
+
def initialize(*args)
|
6
|
+
unless args.first.is_a?(Regexp::Token)
|
7
|
+
warn('WARNING: Root.new without a Token argument is deprecated and '\
|
8
|
+
'will be removed in 2.0.0. Use Root.build for the old behavior.')
|
9
|
+
return super(self.class.build_token, *args)
|
10
|
+
end
|
11
|
+
super
|
6
12
|
end
|
7
13
|
|
8
|
-
|
9
|
-
|
10
|
-
|
14
|
+
class << self
|
15
|
+
def build(options = {})
|
16
|
+
new(build_token, options)
|
17
|
+
end
|
11
18
|
|
19
|
+
def build_token
|
20
|
+
Regexp::Token.new(:expression, :root, '', 0)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
12
24
|
end
|