regexp_parser 1.0.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +60 -30
- data/README.md +29 -29
- data/lib/regexp_parser/expression.rb +5 -4
- data/lib/regexp_parser/expression/classes/conditional.rb +27 -23
- data/lib/regexp_parser/expression/classes/group.rb +3 -1
- data/lib/regexp_parser/expression/classes/root.rb +17 -5
- data/lib/regexp_parser/expression/methods/traverse.rb +0 -1
- data/lib/regexp_parser/expression/quantifier.rb +11 -2
- data/lib/regexp_parser/expression/sequence.rb +37 -13
- data/lib/regexp_parser/expression/sequence_operation.rb +1 -6
- data/lib/regexp_parser/expression/subexpression.rb +15 -4
- data/lib/regexp_parser/parser.rb +19 -10
- data/lib/regexp_parser/scanner.rb +8 -8
- data/lib/regexp_parser/scanner/scanner.rl +8 -8
- data/lib/regexp_parser/version.rb +1 -1
- data/test/expression/test_subexpression.rb +9 -0
- data/test/lexer/test_refcalls.rb +3 -0
- data/test/parser/test_all.rb +3 -3
- data/test/parser/test_conditionals.rb +47 -10
- data/test/parser/test_groups.rb +22 -0
- data/test/parser/test_quantifiers.rb +43 -1
- data/test/parser/test_refcalls.rb +36 -20
- data/test/scanner/test_all.rb +4 -4
- data/test/scanner/test_refcalls.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20ba21704667276107a1041b3bb5943bbbec0078f706cf0d7db85110631dfe8d
|
4
|
+
data.tar.gz: 87886f6cad480ebc62f3e1f243d9b61170097e5419fc8b3972cd3348e5d8d7e0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0678640973741b2ea63053c058809fa075b3b465756bddee9a1914f67f7181a3681d3592662d4eadf5a60e844c550950b371577239924c4d3ce7f07f9fdfefa6'
|
7
|
+
data.tar.gz: 3bf18d0d7989c1f9eef010d1579ac78537c6c083c9b7c7c2f0cda094c0f973e1fdcc17c5992ae35d823720d2cdb10a60424876e08bd4b2b60b125c8b107a62bf
|
data/CHANGELOG.md
CHANGED
@@ -1,54 +1,84 @@
|
|
1
|
-
## [
|
1
|
+
## [1.2.0] - 2018-09-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
- `Subexpression` (branch node) includes `Enumerable`, allowing to `#select` children etc.
|
6
|
+
|
7
|
+
### Fixed
|
8
|
+
|
9
|
+
- Fixed missing quantifier in `Conditional::Expression` methods `#to_s`, `#to_re`
|
10
|
+
- `Conditional::Condition` no longer lives outside the recursive `#expressions` tree
|
11
|
+
- it used to be the only expression stored in a custom ivar, complicating traversal
|
12
|
+
- its setter and getter (`#condition=`, `#condition`) still work as before
|
13
|
+
|
14
|
+
## [1.1.0] - 2018-09-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
15
|
+
|
16
|
+
### Added
|
17
|
+
|
18
|
+
- Added `Quantifier` methods `#greedy?`, `#possessive?`, `#reluctant?`/`#lazy?`
|
19
|
+
- Added `Group::Options#option_changes`
|
20
|
+
- shows the options enabled or disabled by the given options group
|
21
|
+
- as with all other expressions, `#options` shows the overall active options
|
22
|
+
- Added `Conditional#reference` and `Condition#reference`, indicating the determinative group
|
23
|
+
- Added `Subexpression#dig`, acts like [`Array#dig`](http://ruby-doc.org/core-2.5.0/Array.html#method-i-dig)
|
24
|
+
|
25
|
+
### Fixed
|
26
|
+
|
27
|
+
- Fixed parsing of quantified conditional expressions (quantifiers were assigned to the wrong expression)
|
28
|
+
- Fixed scanning and parsing of forward-referring subexpression calls (e.g. `\g<+1>`)
|
29
|
+
- `Root` and `Sequence` expressions now support the same constructor signature as all other expressions
|
30
|
+
|
31
|
+
## [1.0.0] - 2018-09-01 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
32
|
|
3
33
|
This release includes several breaking changes, mostly to character sets, #map and properties.
|
4
34
|
|
5
35
|
### Changed
|
6
36
|
|
7
37
|
- Changed handling of sets (a.k.a. character classes or "bracket expressions")
|
8
|
-
* see PR #55 / issue #47 for details
|
38
|
+
* see PR [#55](https://github.com/ammar/regexp_parser/pull/55) / issue [#47](https://github.com/ammar/regexp_parser/issues/47) for details
|
9
39
|
* sets are now parsed to expression trees like other nestable expressions
|
10
|
-
*
|
11
|
-
* CharacterSet#members has been removed
|
12
|
-
* new Range and Intersection classes represent corresponding syntax features
|
13
|
-
* a new PosixClass expression class represents e.g. [[:ascii:]]
|
14
|
-
* PosixClass instances behave like Property ones, e.g. support
|
15
|
-
*
|
16
|
-
- Changed Subexpression#map to act like regular Enumerable#map
|
17
|
-
* the old behavior is available as Subexpression#flat_map
|
18
|
-
* e.g. parse(/[a]/).map(&:to_s) == ["[a]"]
|
19
|
-
- Changed
|
20
|
-
* EscapeSequence::Codepoint
|
21
|
-
* they already existed, but were all parsed as EscapeSequence::Literal
|
22
|
-
* e.g.
|
23
|
-
- Changed naming of many property tokens (emitted for
|
24
|
-
* if you work with these tokens, see PR #56 for details
|
25
|
-
* e.g.
|
26
|
-
- Changed (?m) and the likes to emit as
|
27
|
-
* allows differentiating from group-local
|
28
|
-
- Changed name of Backreference::..NestLevel to
|
29
|
-
- Changed
|
40
|
+
* `#scan` now emits the same tokens as outside sets (no longer `:set, :member`)
|
41
|
+
* `CharacterSet#members` has been removed
|
42
|
+
* new `Range` and `Intersection` classes represent corresponding syntax features
|
43
|
+
* a new `PosixClass` expression class represents e.g. `[[:ascii:]]`
|
44
|
+
* `PosixClass` instances behave like `Property` ones, e.g. support `#negative?`
|
45
|
+
* `#scan` emits `:(non)posixclass, :<type>` instead of `:set, :char_(non)<type>`
|
46
|
+
- Changed `Subexpression#map` to act like regular `Enumerable#map`
|
47
|
+
* the old behavior is available as `Subexpression#flat_map`
|
48
|
+
* e.g. `parse(/[a]/).map(&:to_s) == ["[a]"]`; used to be `["[a]", "a"]`
|
49
|
+
- Changed expression emissions for some escape sequences
|
50
|
+
* `EscapeSequence::Codepoint`, `CodepointList`, `Hex` and `Octal` are now all used
|
51
|
+
* they already existed, but were all parsed as `EscapeSequence::Literal`
|
52
|
+
* e.g. `\x97` is now `EscapeSequence::Hex` instead of `EscapeSequence::Literal`
|
53
|
+
- Changed naming of many property tokens (emitted for `\p{...}`)
|
54
|
+
* if you work with these tokens, see PR [#56](https://github.com/ammar/regexp_parser/pull/56) for details
|
55
|
+
* e.g. `:punct_dash` is now `:dash_punctuation`
|
56
|
+
- Changed `(?m)` and the likes to emit as `:options_switch` token (@4ade4d1)
|
57
|
+
* allows differentiating from group-local `:options`, e.g. `(?m:.)`
|
58
|
+
- Changed name of `Backreference::..NestLevel` to `..RecursionLevel` (@4184339)
|
59
|
+
- Changed B`ackreference::Number#number` from `String` to `Integer` (@40a2231)
|
30
60
|
|
31
61
|
### Added
|
32
62
|
|
33
63
|
- Added support for all previously missing properties (about 250)
|
34
|
-
- Added Expression::UnicodeProperty#shortcut (e.g. returns "m" for
|
35
|
-
- Added
|
36
|
-
- Added
|
37
|
-
- Added
|
64
|
+
- Added `Expression::UnicodeProperty#shortcut` (e.g. returns "m" for `\p{mark}`)
|
65
|
+
- Added `#char(s)` and `#codepoint(s)` methods to all `EscapeSequence` expressions
|
66
|
+
- Added `#number`/`#name`/`#recursion_level` to all backref/call expressions (@174bf21)
|
67
|
+
- Added `#number` and `#number_at_level` to capturing group expressions (@40a2231)
|
38
68
|
|
39
69
|
### Fixed
|
40
70
|
|
41
|
-
- Fixed
|
71
|
+
- Fixed Ruby version mapping of some properties
|
42
72
|
- Fixed scanning of some property spellings, e.g. with dashes
|
43
73
|
- Fixed some incorrect property alias normalizations
|
44
|
-
- Fixed scanning of codepoint escapes with 6 digits (e.g.
|
45
|
-
- Fixed scanning of
|
74
|
+
- Fixed scanning of codepoint escapes with 6 digits (e.g. `\u{10FFFF}`)
|
75
|
+
- Fixed scanning of `\R` and `\X` within sets; they act as literals there
|
46
76
|
|
47
77
|
## [0.5.0] - 2018-04-29 - [Janosch Müller](mailto:janosch84@gmail.com)
|
48
78
|
|
49
79
|
### Changed
|
50
80
|
|
51
|
-
- Changed handling of Ruby versions (PR #53)
|
81
|
+
- Changed handling of Ruby versions (PR [#53](https://github.com/ammar/regexp_parser/pull/53))
|
52
82
|
* New Ruby versions are now supported by default
|
53
83
|
* Some deep-lying APIs have changed, which should not affect most users:
|
54
84
|
* `Regexp::Syntax::VERSIONS` is gone
|
data/README.md
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
|
3
3
|
[](http://badge.fury.io/rb/regexp_parser) [](http://travis-ci.org/ammar/regexp_parser) [](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
4
|
|
5
|
-
A
|
5
|
+
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
6
|
|
7
7
|
* Multilayered
|
8
|
-
* A scanner/tokenizer based on [
|
8
|
+
* A scanner/tokenizer based on [Ragel](http://www.colm.net/open-source/ragel/)
|
9
9
|
* A lexer that produces a "stream" of token objects.
|
10
10
|
* A parser that produces a "tree" of Expression objects (OO API)
|
11
|
-
* Runs on
|
12
|
-
* Recognizes
|
11
|
+
* Runs on Ruby 1.9, 2.x, and JRuby (1.9 mode) runtimes.
|
12
|
+
* Recognizes Ruby 1.8, 1.9, and 2.x regular expressions [See Supported Syntax](#supported-syntax)
|
13
13
|
|
14
14
|
|
15
15
|
_For examples of regexp_parser in use, see [Example Projects](#example-projects)._
|
@@ -46,7 +46,7 @@ The three main modules are **Scanner**, **Lexer**, and **Parser**. Each of them
|
|
46
46
|
provides a single method that takes a regular expression (as a RegExp object or
|
47
47
|
a string) and returns its results. The **Lexer** and the **Parser** accept an
|
48
48
|
optional second argument that specifies the syntax version, like 'ruby/2.0',
|
49
|
-
which defaults to the host
|
49
|
+
which defaults to the host Ruby version (using RUBY_VERSION).
|
50
50
|
|
51
51
|
Here are the basic usage examples:
|
52
52
|
|
@@ -77,7 +77,7 @@ called with the results as follows:
|
|
77
77
|
## Components
|
78
78
|
|
79
79
|
### Scanner
|
80
|
-
A
|
80
|
+
A Ragel-generated scanner that recognizes the cumulative syntax of all
|
81
81
|
supported syntax versions. It breaks a given expression's text into the
|
82
82
|
smallest parts, and identifies their type, token, text, and start/end
|
83
83
|
offsets within the pattern.
|
@@ -123,7 +123,7 @@ Regexp::Scanner.scan( /(cat?([bhm]at)){3,5}/ ).map {|token| token[2]}
|
|
123
123
|
balancing punctuation and premature end of pattern. Flavor validity checks
|
124
124
|
are performed in the lexer, which uses a syntax object.
|
125
125
|
|
126
|
-
* If the input is a
|
126
|
+
* If the input is a Ruby **Regexp** object, the scanner calls #source on it to
|
127
127
|
get its string representation. #source does not include the options of
|
128
128
|
the expression (m, i, and x). To include the options in the scan, #to_s
|
129
129
|
should be called on the **Regexp** before passing it to the scanner or the
|
@@ -188,7 +188,7 @@ ruby_18.implements? :conditional, :condition # => false
|
|
188
188
|
Sits on top of the scanner and performs lexical analysis on the tokens that
|
189
189
|
it emits. Among its tasks are; breaking quantified literal runs, collecting the
|
190
190
|
emitted token attributes into Token objects, calculating their nesting depth,
|
191
|
-
normalizing tokens for the parser, and
|
191
|
+
normalizing tokens for the parser, and checking if the tokens are implemented by
|
192
192
|
the given syntax version.
|
193
193
|
|
194
194
|
See the [Token Objects](https://github.com/ammar/regexp_parser/wiki/Token-Objects)
|
@@ -196,7 +196,7 @@ wiki page for more information on Token objects.
|
|
196
196
|
|
197
197
|
|
198
198
|
#### Example
|
199
|
-
The following example lexes the given pattern, checks it against the
|
199
|
+
The following example lexes the given pattern, checks it against the Ruby 1.9
|
200
200
|
syntax, and prints the token objects' text indented to their level.
|
201
201
|
|
202
202
|
```ruby
|
@@ -224,7 +224,7 @@ end
|
|
224
224
|
|
225
225
|
A one-liner that returns an array of the textual parts of the given pattern.
|
226
226
|
Compare the output with that of the one-liner example of the **Scanner**; notably
|
227
|
-
how the sequence 'cat' is treated. The 't' is
|
227
|
+
how the sequence 'cat' is treated. The 't' is separated because it's followed
|
228
228
|
by a quantifier that only applies to it.
|
229
229
|
|
230
230
|
```ruby
|
@@ -233,7 +233,7 @@ Regexp::Lexer.scan( /(cat?([b]at)){3,5}/ ).map {|token| token.text}
|
|
233
233
|
```
|
234
234
|
|
235
235
|
#### Notes
|
236
|
-
* The syntax argument is optional. It defaults to the version of the
|
236
|
+
* The syntax argument is optional. It defaults to the version of the Ruby
|
237
237
|
interpreter in use, as returned by RUBY_VERSION.
|
238
238
|
|
239
239
|
* The lexer normalizes some tokens, as noted in the Syntax section above.
|
@@ -308,8 +308,8 @@ Expression class. See the next section for details._
|
|
308
308
|
|
309
309
|
|
310
310
|
## Supported Syntax
|
311
|
-
The three modules support all the regular expression syntax features of Ruby 1.8
|
312
|
-
|
311
|
+
The three modules support all the regular expression syntax features of Ruby 1.8,
|
312
|
+
1.9, and 2.x:
|
313
313
|
|
314
314
|
_Note that not all of these are available in all versions of Ruby_
|
315
315
|
|
@@ -317,8 +317,8 @@ _Note that not all of these are available in all versions of Ruby_
|
|
317
317
|
| Syntax Feature | Examples | ⋯ |
|
318
318
|
| ------------------------------------- | ------------------------------------------------------- |:--------:|
|
319
319
|
| **Alternation** | `a\|b\|c` | ✓ |
|
320
|
-
| **Anchors** | `^`,
|
321
|
-
| **Character Classes** | `[abc]`, `[^\\]`, `[a-d&&
|
320
|
+
| **Anchors** | `\A`, `^`, `\b` | ✓ |
|
321
|
+
| **Character Classes** | `[abc]`, `[^\\]`, `[a-d&&aeiou]`, `[a=e=b]` | ✓ |
|
322
322
|
| **Character Types** | `\d`, `\H`, `\s` | ✓ |
|
323
323
|
| **Cluster Types** | `\R`, `\X` | ✓ |
|
324
324
|
| **Conditional Exps.** | `(?(cond)yes-subexp)`, `(?(cond)yes-subexp\|no-subexp)` | ✓ |
|
@@ -341,7 +341,7 @@ _Note that not all of these are available in all versions of Ruby_
|
|
341
341
|
|   _**Capturing**_ | `(abc)` | ✓ |
|
342
342
|
|   _**Comments**_ | `(?# comment text)` | ✓ |
|
343
343
|
|   _**Named**_ | `(?<name>abc)`, `(?'name'abc)` | ✓ |
|
344
|
-
|   _**Options**_ | `(?mi-x:abc)`, `(?a:\s\w+)`
|
344
|
+
|   _**Options**_ | `(?mi-x:abc)`, `(?a:\s\w+)`, `(?i)` | ✓ |
|
345
345
|
|   _**Passive**_ | `(?:abc)` | ✓ |
|
346
346
|
|   _**Subexp. Calls**_ | `\g<name>`, `\g<1>` | ✓ |
|
347
347
|
| **Keep** | `\K`, `(ab\Kc\|d\Ke)f` | ✓ |
|
@@ -357,14 +357,14 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
357
|
|   _**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
358
358
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
359
359
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
360
|
-
| **Unicode Properties** | _<sub>([Unicode
|
361
|
-
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`
|
362
|
-
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`
|
363
|
-
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`
|
364
|
-
|   _**Derived**_ | `\p{Math}`, `\P{Lowercase}`
|
365
|
-
|   _**General Categories**_ | `\p{Lu}`, `\P{Cs}`
|
366
|
-
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`
|
367
|
-
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`
|
360
|
+
| **Unicode Properties** | _<sub>([Unicode 10.0.0](http://www.unicode.org/versions/Unicode10.0.0/))</sub>_ | ⋱ |
|
361
|
+
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
362
|
+
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
363
|
+
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
364
|
+
|   _**Derived**_ | `\p{Math}`, `\P{Lowercase}`, `\p{^Cased}` | ✓ |
|
365
|
+
|   _**General Categories**_ | `\p{Lu}`, `\P{Cs}`, `\p{^sc}` | ✓ |
|
366
|
+
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
367
|
+
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
368
368
|
|
369
369
|
##### Inapplicable Features
|
370
370
|
|
@@ -389,9 +389,9 @@ or incorrectly return tokens/objects as literals._
|
|
389
389
|
## Testing
|
390
390
|
To run the tests simply run rake from the root directory, as 'test' is the default task.
|
391
391
|
|
392
|
-
It generates the scanner's code from the
|
392
|
+
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
393
393
|
|
394
|
-
The tests use
|
394
|
+
The tests use Ruby's test/unit. They can also be run with:
|
395
395
|
|
396
396
|
```
|
397
397
|
bin/test
|
@@ -409,16 +409,16 @@ It is sometimes helpful during development to focus on a specific test case, for
|
|
409
409
|
bin/test test/expression/test_base.rb -n test_expression_to_re
|
410
410
|
```
|
411
411
|
|
412
|
-
Note that changes to
|
412
|
+
Note that changes to Ragel files will not be reflected when using `bin/test`, so you might want to run:
|
413
413
|
|
414
414
|
```
|
415
415
|
rake ragel:rb && bin/test test/scanner/test_properties.rb
|
416
416
|
```
|
417
417
|
|
418
418
|
## Building
|
419
|
-
Building the scanner and the gem requires [
|
419
|
+
Building the scanner and the gem requires [Ragel](http://www.colm.net/open-source/ragel/) to be
|
420
420
|
installed. The build tasks will automatically invoke the 'ragel:rb' task to generate the
|
421
|
-
|
421
|
+
Ruby scanner code.
|
422
422
|
|
423
423
|
|
424
424
|
The project uses the standard rubygems package tasks, so:
|
@@ -72,16 +72,16 @@ module Regexp::Expression
|
|
72
72
|
end
|
73
73
|
|
74
74
|
def greedy?
|
75
|
-
quantified? and quantifier.
|
75
|
+
quantified? and quantifier.greedy?
|
76
76
|
end
|
77
77
|
|
78
78
|
def reluctant?
|
79
|
-
quantified? and quantifier.
|
79
|
+
quantified? and quantifier.reluctant?
|
80
80
|
end
|
81
81
|
alias :lazy? :reluctant?
|
82
82
|
|
83
83
|
def possessive?
|
84
|
-
quantified? and quantifier.
|
84
|
+
quantified? and quantifier.possessive?
|
85
85
|
end
|
86
86
|
|
87
87
|
def multiline?
|
@@ -127,7 +127,7 @@ module Regexp::Expression
|
|
127
127
|
end
|
128
128
|
alias :=~ :match
|
129
129
|
|
130
|
-
def
|
130
|
+
def attributes
|
131
131
|
{
|
132
132
|
type: type,
|
133
133
|
token: token,
|
@@ -141,6 +141,7 @@ module Regexp::Expression
|
|
141
141
|
quantifier: quantified? ? quantifier.to_h : nil,
|
142
142
|
}
|
143
143
|
end
|
144
|
+
alias :to_h :attributes
|
144
145
|
end
|
145
146
|
|
146
147
|
def self.parsed(exp)
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module Conditional
|
4
3
|
class TooManyBranches < StandardError
|
5
4
|
def initialize
|
@@ -7,43 +6,48 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
end
|
9
8
|
|
10
|
-
class Condition < Regexp::Expression::Base
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def initialize(token, options = {})
|
17
|
-
super
|
18
|
-
@branches = []
|
9
|
+
class Condition < Regexp::Expression::Base
|
10
|
+
# Name or number of the referenced capturing group that determines state.
|
11
|
+
# Returns a String if reference is by name, Integer if by number.
|
12
|
+
def reference
|
13
|
+
ref = text.tr("'<>()", "")
|
14
|
+
ref =~ /\D/ ? ref : Integer(ref)
|
19
15
|
end
|
16
|
+
end
|
20
17
|
|
21
|
-
|
22
|
-
@condition = exp
|
23
|
-
expressions << exp
|
24
|
-
end
|
18
|
+
class Branch < Regexp::Expression::Sequence; end
|
25
19
|
|
20
|
+
class Expression < Regexp::Expression::Subexpression
|
26
21
|
def <<(exp)
|
27
22
|
expressions.last << exp
|
28
23
|
end
|
29
24
|
|
30
|
-
def
|
25
|
+
def add_sequence
|
31
26
|
raise TooManyBranches.new if branches.length == 2
|
27
|
+
Branch.add_to(self, { conditional_level: conditional_level + 1 })
|
28
|
+
end
|
29
|
+
alias :branch :add_sequence
|
32
30
|
|
33
|
-
|
31
|
+
def condition=(exp)
|
32
|
+
expressions.delete(condition)
|
33
|
+
expressions.unshift(exp)
|
34
|
+
end
|
34
35
|
|
35
|
-
|
36
|
-
|
36
|
+
def condition
|
37
|
+
find { |subexp| subexp.is_a?(Condition) }
|
37
38
|
end
|
38
39
|
|
39
|
-
def
|
40
|
-
|
40
|
+
def branches
|
41
|
+
select { |subexp| subexp.is_a?(Sequence) }
|
41
42
|
end
|
42
43
|
|
43
|
-
def
|
44
|
-
|
44
|
+
def reference
|
45
|
+
condition.reference
|
46
|
+
end
|
47
|
+
|
48
|
+
def to_s(format = :full)
|
49
|
+
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
45
50
|
end
|
46
51
|
end
|
47
52
|
end
|
48
|
-
|
49
53
|
end
|
@@ -12,8 +12,10 @@ module Regexp::Expression
|
|
12
12
|
|
13
13
|
class Atomic < Group::Base; end
|
14
14
|
class Passive < Group::Base; end
|
15
|
-
class Options < Group::Base; end
|
16
15
|
class Absence < Group::Base; end
|
16
|
+
class Options < Group::Base
|
17
|
+
attr_accessor :option_changes
|
18
|
+
end
|
17
19
|
|
18
20
|
class Capture < Group::Base
|
19
21
|
attr_accessor :number, :number_at_level
|
@@ -1,12 +1,24 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
|
3
3
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
|
4
|
+
# TODO: this override is here for backwards compatibility, remove in 2.0.0
|
5
|
+
def initialize(*args)
|
6
|
+
unless args.first.is_a?(Regexp::Token)
|
7
|
+
warn('WARNING: Root.new without a Token argument is deprecated and '\
|
8
|
+
'will be removed in 2.0.0. Use Root.build for the old behavior.')
|
9
|
+
return super(self.class.build_token, *args)
|
10
|
+
end
|
11
|
+
super
|
6
12
|
end
|
7
13
|
|
8
|
-
|
9
|
-
|
10
|
-
|
14
|
+
class << self
|
15
|
+
def build(options = {})
|
16
|
+
new(build_token, options)
|
17
|
+
end
|
11
18
|
|
19
|
+
def build_token
|
20
|
+
Regexp::Token.new(:expression, :root, '', 0)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
12
24
|
end
|