regexp_parser 2.0.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +66 -0
- data/Gemfile +6 -1
- data/README.md +1 -4
- data/Rakefile +8 -8
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression.rb +3 -2
- data/lib/regexp_parser/expression/classes/backref.rb +5 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
- data/lib/regexp_parser/expression/classes/group.rb +12 -2
- data/lib/regexp_parser/expression/classes/property.rb +1 -1
- data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +3 -9
- data/lib/regexp_parser/expression/subexpression.rb +1 -1
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner.rb +1084 -1230
- data/lib/regexp_parser/scanner/scanner.rl +80 -110
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/syntax/any.rb +3 -3
- data/lib/regexp_parser/syntax/base.rb +1 -1
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/expression/subexpression_spec.rb +1 -1
- data/spec/expression/to_s_spec.rb +28 -36
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/errors_spec.rb +1 -1
- data/spec/parser/quantifiers_spec.rb +1 -0
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/scanner/escapes_spec.rb +2 -1
- data/spec/scanner/groups_spec.rb +10 -1
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +57 -14
- data/spec/spec_helper.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
|
|
4
|
+
data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
|
|
7
|
+
data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,71 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
|
|
7
|
+
- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
|
|
8
|
+
* thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
|
|
9
|
+
|
|
10
|
+
## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- common ancestor for all scanning/parsing/lexing errors
|
|
15
|
+
* `Regexp::Parser::Error` can now be rescued as a catch-all
|
|
16
|
+
* the following errors (and their many descendants) now inherit from it:
|
|
17
|
+
- `Regexp::Expression::Conditional::TooManyBranches`
|
|
18
|
+
- `Regexp::Parser::ParserError`
|
|
19
|
+
- `Regexp::Scanner::ScannerError`
|
|
20
|
+
- `Regexp::Scanner::ValidationError`
|
|
21
|
+
- `Regexp::Syntax::SyntaxError`
|
|
22
|
+
* it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
|
|
23
|
+
* thanks to [sandstrom](https://github.com/sandstrom) for the cue
|
|
24
|
+
|
|
25
|
+
### Fixed
|
|
26
|
+
|
|
27
|
+
- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
|
|
28
|
+
* a regression in v2.0.1 had caused them to be scanned as literals
|
|
29
|
+
- fixed scanning of some backreference and subexpression call edge cases
|
|
30
|
+
* e.g. `\k<+1>`, `\g<x-1>`
|
|
31
|
+
- fixed tokenization of some escapes in character sets
|
|
32
|
+
* `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
|
|
33
|
+
* all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
|
|
34
|
+
* if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
|
|
35
|
+
* the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
|
|
36
|
+
- fixed handling of control/metacontrol escapes in character sets
|
|
37
|
+
* e.g. `[\cX]`, `[\M-\C-X]`
|
|
38
|
+
* they were misread as bunch of individual literals, escapes, and ranges
|
|
39
|
+
- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
|
|
40
|
+
|
|
41
|
+
## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
42
|
+
|
|
43
|
+
### Fixed
|
|
44
|
+
|
|
45
|
+
- fixed error when scanning some unlikely and redundant but valid charset patterns
|
|
46
|
+
* e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
|
|
47
|
+
- fixed ancestry of some error classes related to syntax version lookup
|
|
48
|
+
* `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
|
|
49
|
+
* they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
|
|
50
|
+
|
|
51
|
+
## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
52
|
+
|
|
53
|
+
### Fixed
|
|
54
|
+
|
|
55
|
+
- fixed `FrozenError` when calling `#to_s` on a frozen `Group::Passive`
|
|
56
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon)
|
|
57
|
+
|
|
58
|
+
## [2.0.1] - 2020-12-20 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
59
|
+
|
|
60
|
+
### Fixed
|
|
61
|
+
|
|
62
|
+
- fixed error when scanning some group names
|
|
63
|
+
* this affected names containing hyphens, digits or multibyte chars, e.g. `/(?<a1>a)/`
|
|
64
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
|
65
|
+
- fixed error when scanning hex escapes with just one hex digit
|
|
66
|
+
* e.g. `/\x0A/` was scanned correctly, but the equivalent `/\xA/` was not
|
|
67
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
|
68
|
+
|
|
3
69
|
## [2.0.0] - 2020-11-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
|
4
70
|
|
|
5
71
|
### Changed
|
data/Gemfile
CHANGED
|
@@ -3,7 +3,12 @@ source 'https://rubygems.org'
|
|
|
3
3
|
gemspec
|
|
4
4
|
|
|
5
5
|
group :development, :test do
|
|
6
|
+
gem 'ice_nine', '~> 0.11.2'
|
|
6
7
|
gem 'rake', '~> 13.0'
|
|
7
8
|
gem 'regexp_property_values', '~> 1.0'
|
|
8
|
-
gem 'rspec', '~> 3.
|
|
9
|
+
gem 'rspec', '~> 3.10'
|
|
10
|
+
if RUBY_VERSION.to_f >= 2.7
|
|
11
|
+
gem 'gouteur'
|
|
12
|
+
gem 'rubocop', '~> 1.7'
|
|
13
|
+
end
|
|
9
14
|
end
|
data/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Regexp::Parser
|
|
2
2
|
|
|
3
|
-
[](http://badge.fury.io/rb/regexp_parser) [](http://badge.fury.io/rb/regexp_parser) [](https://github.com/ammar/regexp_parser/actions) [](https://github.com/ammar/regexp_parser/actions) [](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
|
4
4
|
|
|
5
5
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
|
6
6
|
|
|
@@ -22,9 +22,6 @@ _For examples of regexp_parser in use, see [Example Projects](#example-projects)
|
|
|
22
22
|
* Ragel >= 6.0, but only if you want to build the gem or work on the scanner.
|
|
23
23
|
|
|
24
24
|
|
|
25
|
-
_Note: See the .travis.yml file for covered versions._
|
|
26
|
-
|
|
27
|
-
|
|
28
25
|
---
|
|
29
26
|
## Install
|
|
30
27
|
|
data/Rakefile
CHANGED
|
@@ -7,8 +7,8 @@ require 'bundler'
|
|
|
7
7
|
require 'rubygems/package_task'
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
RAGEL_SOURCE_DIR = File.
|
|
11
|
-
RAGEL_OUTPUT_DIR = File.
|
|
10
|
+
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
|
11
|
+
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
|
12
12
|
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
|
13
13
|
|
|
14
14
|
|
|
@@ -25,11 +25,11 @@ end
|
|
|
25
25
|
|
|
26
26
|
namespace :ragel do
|
|
27
27
|
desc "Process the ragel source files and output ruby code"
|
|
28
|
-
task :rb do
|
|
29
|
-
RAGEL_SOURCE_FILES.each do |
|
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{
|
|
28
|
+
task :rb do
|
|
29
|
+
RAGEL_SOURCE_FILES.each do |source_file|
|
|
30
|
+
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
|
31
31
|
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{
|
|
32
|
+
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
|
33
33
|
|
|
34
34
|
contents = File.read(output_file)
|
|
35
35
|
|
|
@@ -42,7 +42,7 @@ namespace :ragel do
|
|
|
42
42
|
end
|
|
43
43
|
|
|
44
44
|
desc "Delete the ragel generated source file(s)"
|
|
45
|
-
task :clean do
|
|
45
|
+
task :clean do
|
|
46
46
|
RAGEL_SOURCE_FILES.each do |file|
|
|
47
47
|
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
|
48
48
|
end
|
|
@@ -61,7 +61,7 @@ namespace :props do
|
|
|
61
61
|
task :update do
|
|
62
62
|
require 'regexp_property_values'
|
|
63
63
|
RegexpPropertyValues.update
|
|
64
|
-
dir = File.
|
|
64
|
+
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
|
65
65
|
|
|
66
66
|
require 'psych'
|
|
67
67
|
write_hash_to_file = ->(hash, path) do
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
require 'regexp_parser/error'
|
|
2
2
|
|
|
3
|
+
module Regexp::Expression
|
|
3
4
|
class Base
|
|
4
5
|
attr_accessor :type, :token
|
|
5
6
|
attr_accessor :text, :ts
|
|
@@ -21,7 +22,7 @@ module Regexp::Expression
|
|
|
21
22
|
self.options = options
|
|
22
23
|
end
|
|
23
24
|
|
|
24
|
-
def
|
|
25
|
+
def initialize_copy(orig)
|
|
25
26
|
self.text = (orig.text ? orig.text.dup : nil)
|
|
26
27
|
self.options = (orig.options ? orig.options.dup : nil)
|
|
27
28
|
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
|
@@ -2,6 +2,11 @@ module Regexp::Expression
|
|
|
2
2
|
module Backreference
|
|
3
3
|
class Base < Regexp::Expression::Base
|
|
4
4
|
attr_accessor :referenced_expression
|
|
5
|
+
|
|
6
|
+
def initialize_copy(orig)
|
|
7
|
+
self.referenced_expression = orig.referenced_expression.dup
|
|
8
|
+
super
|
|
9
|
+
end
|
|
5
10
|
end
|
|
6
11
|
|
|
7
12
|
class Number < Backreference::Base
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
module Regexp::Expression
|
|
2
2
|
module Conditional
|
|
3
|
-
class TooManyBranches <
|
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
|
4
4
|
def initialize
|
|
5
5
|
super('The conditional expression has more than 2 branches')
|
|
6
6
|
end
|
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
|
15
15
|
ref = text.tr("'<>()", "")
|
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
|
17
17
|
end
|
|
18
|
+
|
|
19
|
+
def initialize_copy(orig)
|
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
|
21
|
+
super
|
|
22
|
+
end
|
|
18
23
|
end
|
|
19
24
|
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
|
@@ -53,6 +58,11 @@ module Regexp::Expression
|
|
|
53
58
|
def to_s(format = :full)
|
|
54
59
|
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
|
55
60
|
end
|
|
61
|
+
|
|
62
|
+
def initialize_copy(orig)
|
|
63
|
+
self.referenced_expression = orig.referenced_expression.dup
|
|
64
|
+
super
|
|
65
|
+
end
|
|
56
66
|
end
|
|
57
67
|
end
|
|
58
68
|
end
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
module Regexp::Expression
|
|
2
2
|
|
|
3
3
|
class FreeSpace < Regexp::Expression::Base
|
|
4
|
-
def quantify(
|
|
5
|
-
raise
|
|
4
|
+
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
|
5
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
|
6
6
|
end
|
|
7
7
|
end
|
|
8
8
|
|
|
@@ -13,6 +13,11 @@ module Regexp::Expression
|
|
|
13
13
|
class Passive < Group::Base
|
|
14
14
|
attr_writer :implicit
|
|
15
15
|
|
|
16
|
+
def initialize(*)
|
|
17
|
+
@implicit = false
|
|
18
|
+
super
|
|
19
|
+
end
|
|
20
|
+
|
|
16
21
|
def to_s(format = :full)
|
|
17
22
|
if implicit?
|
|
18
23
|
"#{expressions.join}#{quantifier_affix(format)}"
|
|
@@ -22,7 +27,7 @@ module Regexp::Expression
|
|
|
22
27
|
end
|
|
23
28
|
|
|
24
29
|
def implicit?
|
|
25
|
-
@implicit
|
|
30
|
+
@implicit
|
|
26
31
|
end
|
|
27
32
|
end
|
|
28
33
|
|
|
@@ -30,6 +35,11 @@ module Regexp::Expression
|
|
|
30
35
|
class Atomic < Group::Base; end
|
|
31
36
|
class Options < Group::Base
|
|
32
37
|
attr_accessor :option_changes
|
|
38
|
+
|
|
39
|
+
def initialize_copy(orig)
|
|
40
|
+
self.option_changes = orig.option_changes.dup
|
|
41
|
+
super
|
|
42
|
+
end
|
|
33
43
|
end
|
|
34
44
|
|
|
35
45
|
class Capture < Group::Base
|
|
@@ -48,7 +58,7 @@ module Regexp::Expression
|
|
|
48
58
|
super
|
|
49
59
|
end
|
|
50
60
|
|
|
51
|
-
def
|
|
61
|
+
def initialize_copy(orig)
|
|
52
62
|
@name = orig.name.dup
|
|
53
63
|
super
|
|
54
64
|
end
|
|
@@ -10,7 +10,7 @@ class Regexp::MatchLength
|
|
|
10
10
|
self.exp_class = exp.class
|
|
11
11
|
self.min_rep = exp.repetitions.min
|
|
12
12
|
self.max_rep = exp.repetitions.max
|
|
13
|
-
if base = opts[:base]
|
|
13
|
+
if (base = opts[:base])
|
|
14
14
|
self.base_min = base
|
|
15
15
|
self.base_max = base
|
|
16
16
|
self.reify = ->{ '.' * base }
|
|
@@ -32,7 +32,7 @@ class Regexp::MatchLength
|
|
|
32
32
|
end
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
-
def endless_each
|
|
35
|
+
def endless_each
|
|
36
36
|
return enum_for(__method__) unless block_given?
|
|
37
37
|
(min..max).each { |num| yield(num) if include?(num) }
|
|
38
38
|
end
|
|
@@ -36,7 +36,7 @@ module Regexp::Expression
|
|
|
36
36
|
|
|
37
37
|
# Iterates over the expressions of this expression as an array, passing
|
|
38
38
|
# the expression and its index within its parent to the given block.
|
|
39
|
-
def each_expression(include_self = false
|
|
39
|
+
def each_expression(include_self = false)
|
|
40
40
|
return enum_for(__method__, include_self) unless block_given?
|
|
41
41
|
|
|
42
42
|
traverse(include_self) do |event, exp, index|
|
|
@@ -47,7 +47,7 @@ module Regexp::Expression
|
|
|
47
47
|
# Returns a new array with the results of calling the given block once
|
|
48
48
|
# for every expression. If a block is not given, returns an array with
|
|
49
49
|
# each expression and its level index as an array.
|
|
50
|
-
def flat_map(include_self = false
|
|
50
|
+
def flat_map(include_self = false)
|
|
51
51
|
result = []
|
|
52
52
|
|
|
53
53
|
each_expression(include_self) do |exp, index|
|
|
@@ -41,17 +41,11 @@ module Regexp::Expression
|
|
|
41
41
|
alias :ts :starts_at
|
|
42
42
|
|
|
43
43
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
|
44
|
-
|
|
45
|
-
target
|
|
46
|
-
|
|
47
|
-
target = expressions[offset -= 1]
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
|
51
|
-
'quantifier')
|
|
44
|
+
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
|
45
|
+
target or raise Regexp::Parser::Error,
|
|
46
|
+
"No valid target found for '#{text}' quantifier"
|
|
52
47
|
|
|
53
48
|
target.quantify(token, text, min, max, mode)
|
|
54
49
|
end
|
|
55
50
|
end
|
|
56
|
-
|
|
57
51
|
end
|
data/lib/regexp_parser/parser.rb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
+
require 'regexp_parser/error'
|
|
1
2
|
require 'regexp_parser/expression'
|
|
2
3
|
|
|
3
4
|
class Regexp::Parser
|
|
4
5
|
include Regexp::Expression
|
|
5
|
-
include Regexp::Syntax
|
|
6
6
|
|
|
7
|
-
class ParserError <
|
|
7
|
+
class ParserError < Regexp::Parser::Error; end
|
|
8
8
|
|
|
9
9
|
class UnknownTokenTypeError < ParserError
|
|
10
10
|
def initialize(type, token)
|
|
@@ -70,95 +70,155 @@ class Regexp::Parser
|
|
|
70
70
|
enabled_options
|
|
71
71
|
end
|
|
72
72
|
|
|
73
|
-
def
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
73
|
+
def parse_token(token)
|
|
74
|
+
case token.type
|
|
75
|
+
when :anchor; anchor(token)
|
|
76
|
+
when :assertion, :group; group(token)
|
|
77
|
+
when :backref; backref(token)
|
|
78
|
+
when :conditional; conditional(token)
|
|
79
|
+
when :escape; escape(token)
|
|
80
|
+
when :free_space; free_space(token)
|
|
81
|
+
when :keep; keep(token)
|
|
82
|
+
when :literal; literal(token)
|
|
83
|
+
when :meta; meta(token)
|
|
84
|
+
when :posixclass, :nonposixclass; posixclass(token)
|
|
85
|
+
when :property, :nonproperty; property(token)
|
|
86
|
+
when :quantifier; quantifier(token)
|
|
87
|
+
when :set; set(token)
|
|
88
|
+
when :type; type(token)
|
|
89
|
+
else
|
|
90
|
+
raise UnknownTokenTypeError.new(token.type, token)
|
|
91
|
+
end
|
|
79
92
|
|
|
80
|
-
|
|
81
|
-
def update_transplanted_subtree(exp, new_parent)
|
|
82
|
-
exp.nesting_level = new_parent.nesting_level + 1
|
|
83
|
-
exp.respond_to?(:each) &&
|
|
84
|
-
exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
|
|
93
|
+
close_completed_character_set_range
|
|
85
94
|
end
|
|
86
95
|
|
|
87
|
-
def
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
96
|
+
def anchor(token)
|
|
97
|
+
case token.token
|
|
98
|
+
when :bol; node << Anchor::BeginningOfLine.new(token, active_opts)
|
|
99
|
+
when :bos; node << Anchor::BOS.new(token, active_opts)
|
|
100
|
+
when :eol; node << Anchor::EndOfLine.new(token, active_opts)
|
|
101
|
+
when :eos; node << Anchor::EOS.new(token, active_opts)
|
|
102
|
+
when :eos_ob_eol; node << Anchor::EOSobEOL.new(token, active_opts)
|
|
103
|
+
when :match_start; node << Anchor::MatchStart.new(token, active_opts)
|
|
104
|
+
when :nonword_boundary; node << Anchor::NonWordBoundary.new(token, active_opts)
|
|
105
|
+
when :word_boundary; node << Anchor::WordBoundary.new(token, active_opts)
|
|
106
|
+
else
|
|
107
|
+
raise UnknownTokenError.new('Anchor', token)
|
|
91
108
|
end
|
|
92
|
-
nesting.pop
|
|
93
|
-
yield(node) if block_given?
|
|
94
|
-
self.node = nesting.last
|
|
95
|
-
self.node = node.last if node.last.is_a?(SequenceOperation)
|
|
96
109
|
end
|
|
97
110
|
|
|
98
|
-
def
|
|
99
|
-
|
|
100
|
-
|
|
111
|
+
def group(token)
|
|
112
|
+
case token.token
|
|
113
|
+
when :options, :options_switch
|
|
114
|
+
options_group(token)
|
|
115
|
+
when :close
|
|
116
|
+
close_group
|
|
117
|
+
when :comment
|
|
118
|
+
node << Group::Comment.new(token, active_opts)
|
|
119
|
+
else
|
|
120
|
+
open_group(token)
|
|
121
|
+
end
|
|
101
122
|
end
|
|
102
123
|
|
|
103
|
-
|
|
104
|
-
|
|
124
|
+
MOD_FLAGS = %w[i m x].map(&:to_sym)
|
|
125
|
+
ENC_FLAGS = %w[a d u].map(&:to_sym)
|
|
105
126
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
when :escape; escape(token)
|
|
111
|
-
when :group; group(token)
|
|
112
|
-
when :assertion; group(token)
|
|
113
|
-
when :set; set(token)
|
|
114
|
-
when :type; type(token)
|
|
115
|
-
when :backref; backref(token)
|
|
116
|
-
when :conditional; conditional(token)
|
|
117
|
-
when :keep; keep(token)
|
|
118
|
-
|
|
119
|
-
when :posixclass, :nonposixclass
|
|
120
|
-
posixclass(token)
|
|
121
|
-
when :property, :nonproperty
|
|
122
|
-
property(token)
|
|
123
|
-
|
|
124
|
-
when :literal
|
|
125
|
-
node << Literal.new(token, active_opts)
|
|
126
|
-
when :free_space
|
|
127
|
-
free_space(token)
|
|
127
|
+
def options_group(token)
|
|
128
|
+
positive, negative = token.text.split('-', 2)
|
|
129
|
+
negative ||= ''
|
|
130
|
+
self.switching_options = token.token.equal?(:options_switch)
|
|
128
131
|
|
|
129
|
-
|
|
130
|
-
|
|
132
|
+
opt_changes = {}
|
|
133
|
+
new_active_opts = active_opts.dup
|
|
134
|
+
|
|
135
|
+
MOD_FLAGS.each do |flag|
|
|
136
|
+
if positive.include?(flag.to_s)
|
|
137
|
+
opt_changes[flag] = new_active_opts[flag] = true
|
|
138
|
+
end
|
|
139
|
+
if negative.include?(flag.to_s)
|
|
140
|
+
opt_changes[flag] = false
|
|
141
|
+
new_active_opts.delete(flag)
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
if (enc_flag = positive.reverse[/[adu]/])
|
|
146
|
+
enc_flag = enc_flag.to_sym
|
|
147
|
+
(ENC_FLAGS - [enc_flag]).each do |other|
|
|
148
|
+
opt_changes[other] = false if new_active_opts[other]
|
|
149
|
+
new_active_opts.delete(other)
|
|
150
|
+
end
|
|
151
|
+
opt_changes[enc_flag] = new_active_opts[enc_flag] = true
|
|
131
152
|
end
|
|
153
|
+
|
|
154
|
+
options_stack << new_active_opts
|
|
155
|
+
|
|
156
|
+
options_group = Group::Options.new(token, active_opts)
|
|
157
|
+
options_group.option_changes = opt_changes
|
|
158
|
+
|
|
159
|
+
nest(options_group)
|
|
132
160
|
end
|
|
133
161
|
|
|
134
|
-
def
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
162
|
+
def open_group(token)
|
|
163
|
+
group_class =
|
|
164
|
+
case token.token
|
|
165
|
+
when :absence; Group::Absence
|
|
166
|
+
when :atomic; Group::Atomic
|
|
167
|
+
when :capture; Group::Capture
|
|
168
|
+
when :named; Group::Named
|
|
169
|
+
when :passive; Group::Passive
|
|
170
|
+
|
|
171
|
+
when :lookahead; Assertion::Lookahead
|
|
172
|
+
when :lookbehind; Assertion::Lookbehind
|
|
173
|
+
when :nlookahead; Assertion::NegativeLookahead
|
|
174
|
+
when :nlookbehind; Assertion::NegativeLookbehind
|
|
175
|
+
|
|
176
|
+
else
|
|
177
|
+
raise UnknownTokenError.new('Group type open', token)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
group = group_class.new(token, active_opts)
|
|
181
|
+
|
|
182
|
+
if group.capturing?
|
|
183
|
+
group.number = total_captured_group_count + 1
|
|
184
|
+
group.number_at_level = captured_group_count_at_level + 1
|
|
185
|
+
count_captured_group
|
|
150
186
|
end
|
|
187
|
+
|
|
188
|
+
# Push the active options to the stack again. This way we can simply pop the
|
|
189
|
+
# stack for any group we close, no matter if it had its own options or not.
|
|
190
|
+
options_stack << active_opts
|
|
191
|
+
|
|
192
|
+
nest(group)
|
|
151
193
|
end
|
|
152
194
|
|
|
153
|
-
def
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
195
|
+
def total_captured_group_count
|
|
196
|
+
captured_group_counts.values.reduce(0, :+)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def captured_group_count_at_level
|
|
200
|
+
captured_group_counts[node.level]
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def count_captured_group
|
|
204
|
+
captured_group_counts[node.level] += 1
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def close_group
|
|
208
|
+
options_stack.pop unless switching_options
|
|
209
|
+
self.switching_options = false
|
|
210
|
+
decrease_nesting
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
def decrease_nesting
|
|
214
|
+
while nesting.last.is_a?(SequenceOperation)
|
|
215
|
+
nesting.pop
|
|
216
|
+
self.node = nesting.last
|
|
161
217
|
end
|
|
218
|
+
nesting.pop
|
|
219
|
+
yield(node) if block_given?
|
|
220
|
+
self.node = nesting.last
|
|
221
|
+
self.node = node.last if node.last.is_a?(SequenceOperation)
|
|
162
222
|
end
|
|
163
223
|
|
|
164
224
|
def backref(token)
|
|
@@ -188,31 +248,9 @@ class Regexp::Parser
|
|
|
188
248
|
end
|
|
189
249
|
end
|
|
190
250
|
|
|
191
|
-
def
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
node << CharacterType::Digit.new(token, active_opts)
|
|
195
|
-
when :nondigit
|
|
196
|
-
node << CharacterType::NonDigit.new(token, active_opts)
|
|
197
|
-
when :hex
|
|
198
|
-
node << CharacterType::Hex.new(token, active_opts)
|
|
199
|
-
when :nonhex
|
|
200
|
-
node << CharacterType::NonHex.new(token, active_opts)
|
|
201
|
-
when :space
|
|
202
|
-
node << CharacterType::Space.new(token, active_opts)
|
|
203
|
-
when :nonspace
|
|
204
|
-
node << CharacterType::NonSpace.new(token, active_opts)
|
|
205
|
-
when :word
|
|
206
|
-
node << CharacterType::Word.new(token, active_opts)
|
|
207
|
-
when :nonword
|
|
208
|
-
node << CharacterType::NonWord.new(token, active_opts)
|
|
209
|
-
when :linebreak
|
|
210
|
-
node << CharacterType::Linebreak.new(token, active_opts)
|
|
211
|
-
when :xgrapheme
|
|
212
|
-
node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
|
213
|
-
else
|
|
214
|
-
raise UnknownTokenError.new('CharacterType', token)
|
|
215
|
-
end
|
|
251
|
+
def assign_effective_number(exp)
|
|
252
|
+
exp.effective_number =
|
|
253
|
+
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
|
216
254
|
end
|
|
217
255
|
|
|
218
256
|
def conditional(token)
|
|
@@ -240,11 +278,118 @@ class Regexp::Parser
|
|
|
240
278
|
end
|
|
241
279
|
end
|
|
242
280
|
|
|
281
|
+
def nest_conditional(exp)
|
|
282
|
+
conditional_nesting.push(exp)
|
|
283
|
+
nest(exp)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def nest(exp)
|
|
287
|
+
nesting.push(exp)
|
|
288
|
+
node << exp
|
|
289
|
+
update_transplanted_subtree(exp, node)
|
|
290
|
+
self.node = exp
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# subtrees are transplanted to build Alternations, Intersections, Ranges
|
|
294
|
+
def update_transplanted_subtree(exp, new_parent)
|
|
295
|
+
exp.nesting_level = new_parent.nesting_level + 1
|
|
296
|
+
exp.respond_to?(:each) &&
|
|
297
|
+
exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def escape(token)
|
|
301
|
+
case token.token
|
|
302
|
+
|
|
303
|
+
when :backspace; node << EscapeSequence::Backspace.new(token, active_opts)
|
|
304
|
+
|
|
305
|
+
when :escape; node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
|
306
|
+
when :bell; node << EscapeSequence::Bell.new(token, active_opts)
|
|
307
|
+
when :form_feed; node << EscapeSequence::FormFeed.new(token, active_opts)
|
|
308
|
+
when :newline; node << EscapeSequence::Newline.new(token, active_opts)
|
|
309
|
+
when :carriage; node << EscapeSequence::Return.new(token, active_opts)
|
|
310
|
+
when :tab; node << EscapeSequence::Tab.new(token, active_opts)
|
|
311
|
+
when :vertical_tab; node << EscapeSequence::VerticalTab.new(token, active_opts)
|
|
312
|
+
|
|
313
|
+
when :codepoint; node << EscapeSequence::Codepoint.new(token, active_opts)
|
|
314
|
+
when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
|
|
315
|
+
when :hex; node << EscapeSequence::Hex.new(token, active_opts)
|
|
316
|
+
when :octal; node << EscapeSequence::Octal.new(token, active_opts)
|
|
317
|
+
|
|
318
|
+
when :control
|
|
319
|
+
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
|
320
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
|
321
|
+
else
|
|
322
|
+
node << EscapeSequence::Control.new(token, active_opts)
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
when :meta_sequence
|
|
326
|
+
if token.text =~ /\A\\M-\\[Cc]/
|
|
327
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
|
328
|
+
else
|
|
329
|
+
node << EscapeSequence::Meta.new(token, active_opts)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
else
|
|
333
|
+
# treating everything else as a literal
|
|
334
|
+
# TODO: maybe split this up a bit more in v3.0.0?
|
|
335
|
+
# E.g. escaped quantifiers or set meta chars are not the same
|
|
336
|
+
# as stuff that would be a literal even without the backslash.
|
|
337
|
+
# Right now, they all end up here.
|
|
338
|
+
node << EscapeSequence::Literal.new(token, active_opts)
|
|
339
|
+
end
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def free_space(token)
|
|
343
|
+
case token.token
|
|
344
|
+
when :comment
|
|
345
|
+
node << Comment.new(token, active_opts)
|
|
346
|
+
when :whitespace
|
|
347
|
+
if node.last.is_a?(WhiteSpace)
|
|
348
|
+
node.last.merge(WhiteSpace.new(token, active_opts))
|
|
349
|
+
else
|
|
350
|
+
node << WhiteSpace.new(token, active_opts)
|
|
351
|
+
end
|
|
352
|
+
else
|
|
353
|
+
raise UnknownTokenError.new('FreeSpace', token)
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
def keep(token)
|
|
358
|
+
node << Keep::Mark.new(token, active_opts)
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def literal(token)
|
|
362
|
+
node << Literal.new(token, active_opts)
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
def meta(token)
|
|
366
|
+
case token.token
|
|
367
|
+
when :dot
|
|
368
|
+
node << CharacterType::Any.new(token, active_opts)
|
|
369
|
+
when :alternation
|
|
370
|
+
sequence_operation(Alternation, token)
|
|
371
|
+
else
|
|
372
|
+
raise UnknownTokenError.new('Meta', token)
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
def sequence_operation(klass, token)
|
|
377
|
+
unless node.is_a?(klass)
|
|
378
|
+
operator = klass.new(token, active_opts)
|
|
379
|
+
sequence = operator.add_sequence(active_opts)
|
|
380
|
+
sequence.expressions = node.expressions
|
|
381
|
+
node.expressions = []
|
|
382
|
+
nest(operator)
|
|
383
|
+
end
|
|
384
|
+
node.add_sequence(active_opts)
|
|
385
|
+
end
|
|
386
|
+
|
|
243
387
|
def posixclass(token)
|
|
244
388
|
node << PosixClass.new(token, active_opts)
|
|
245
389
|
end
|
|
246
390
|
|
|
247
391
|
include Regexp::Expression::UnicodeProperty
|
|
392
|
+
UPTokens = Regexp::Syntax::Token::UnicodeProperty
|
|
248
393
|
|
|
249
394
|
def property(token)
|
|
250
395
|
case token.token
|
|
@@ -316,127 +461,20 @@ class Regexp::Parser
|
|
|
316
461
|
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
|
317
462
|
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
|
318
463
|
|
|
319
|
-
when *
|
|
320
|
-
node <<
|
|
321
|
-
|
|
322
|
-
when *
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
when *Token::UnicodeProperty::Emoji
|
|
326
|
-
node << Emoji.new(token, active_opts)
|
|
327
|
-
|
|
328
|
-
when *Token::UnicodeProperty::Script
|
|
329
|
-
node << Script.new(token, active_opts)
|
|
330
|
-
|
|
331
|
-
when *Token::UnicodeProperty::UnicodeBlock
|
|
332
|
-
node << Block.new(token, active_opts)
|
|
464
|
+
when *UPTokens::Age; node << Age.new(token, active_opts)
|
|
465
|
+
when *UPTokens::Derived; node << Derived.new(token, active_opts)
|
|
466
|
+
when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
|
|
467
|
+
when *UPTokens::Script; node << Script.new(token, active_opts)
|
|
468
|
+
when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
|
|
333
469
|
|
|
334
470
|
else
|
|
335
471
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
|
336
472
|
end
|
|
337
473
|
end
|
|
338
474
|
|
|
339
|
-
def anchor(token)
|
|
340
|
-
case token.token
|
|
341
|
-
when :bol
|
|
342
|
-
node << Anchor::BeginningOfLine.new(token, active_opts)
|
|
343
|
-
when :eol
|
|
344
|
-
node << Anchor::EndOfLine.new(token, active_opts)
|
|
345
|
-
when :bos
|
|
346
|
-
node << Anchor::BOS.new(token, active_opts)
|
|
347
|
-
when :eos
|
|
348
|
-
node << Anchor::EOS.new(token, active_opts)
|
|
349
|
-
when :eos_ob_eol
|
|
350
|
-
node << Anchor::EOSobEOL.new(token, active_opts)
|
|
351
|
-
when :word_boundary
|
|
352
|
-
node << Anchor::WordBoundary.new(token, active_opts)
|
|
353
|
-
when :nonword_boundary
|
|
354
|
-
node << Anchor::NonWordBoundary.new(token, active_opts)
|
|
355
|
-
when :match_start
|
|
356
|
-
node << Anchor::MatchStart.new(token, active_opts)
|
|
357
|
-
else
|
|
358
|
-
raise UnknownTokenError.new('Anchor', token)
|
|
359
|
-
end
|
|
360
|
-
end
|
|
361
|
-
|
|
362
|
-
def escape(token)
|
|
363
|
-
case token.token
|
|
364
|
-
|
|
365
|
-
when :backspace
|
|
366
|
-
node << EscapeSequence::Backspace.new(token, active_opts)
|
|
367
|
-
|
|
368
|
-
when :escape
|
|
369
|
-
node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
|
370
|
-
when :bell
|
|
371
|
-
node << EscapeSequence::Bell.new(token, active_opts)
|
|
372
|
-
when :form_feed
|
|
373
|
-
node << EscapeSequence::FormFeed.new(token, active_opts)
|
|
374
|
-
when :newline
|
|
375
|
-
node << EscapeSequence::Newline.new(token, active_opts)
|
|
376
|
-
when :carriage
|
|
377
|
-
node << EscapeSequence::Return.new(token, active_opts)
|
|
378
|
-
when :tab
|
|
379
|
-
node << EscapeSequence::Tab.new(token, active_opts)
|
|
380
|
-
when :vertical_tab
|
|
381
|
-
node << EscapeSequence::VerticalTab.new(token, active_opts)
|
|
382
|
-
|
|
383
|
-
when :hex
|
|
384
|
-
node << EscapeSequence::Hex.new(token, active_opts)
|
|
385
|
-
when :octal
|
|
386
|
-
node << EscapeSequence::Octal.new(token, active_opts)
|
|
387
|
-
when :codepoint
|
|
388
|
-
node << EscapeSequence::Codepoint.new(token, active_opts)
|
|
389
|
-
when :codepoint_list
|
|
390
|
-
node << EscapeSequence::CodepointList.new(token, active_opts)
|
|
391
|
-
|
|
392
|
-
when :control
|
|
393
|
-
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
|
394
|
-
node << EscapeSequence::MetaControl.new(token, active_opts)
|
|
395
|
-
else
|
|
396
|
-
node << EscapeSequence::Control.new(token, active_opts)
|
|
397
|
-
end
|
|
398
|
-
|
|
399
|
-
when :meta_sequence
|
|
400
|
-
if token.text =~ /\A\\M-\\[Cc]/
|
|
401
|
-
node << EscapeSequence::MetaControl.new(token, active_opts)
|
|
402
|
-
else
|
|
403
|
-
node << EscapeSequence::Meta.new(token, active_opts)
|
|
404
|
-
end
|
|
405
|
-
|
|
406
|
-
else
|
|
407
|
-
# treating everything else as a literal
|
|
408
|
-
node << EscapeSequence::Literal.new(token, active_opts)
|
|
409
|
-
end
|
|
410
|
-
end
|
|
411
|
-
|
|
412
|
-
def keep(token)
|
|
413
|
-
node << Keep::Mark.new(token, active_opts)
|
|
414
|
-
end
|
|
415
|
-
|
|
416
|
-
def free_space(token)
|
|
417
|
-
case token.token
|
|
418
|
-
when :comment
|
|
419
|
-
node << Comment.new(token, active_opts)
|
|
420
|
-
when :whitespace
|
|
421
|
-
if node.last.is_a?(WhiteSpace)
|
|
422
|
-
node.last.merge(WhiteSpace.new(token, active_opts))
|
|
423
|
-
else
|
|
424
|
-
node << WhiteSpace.new(token, active_opts)
|
|
425
|
-
end
|
|
426
|
-
else
|
|
427
|
-
raise UnknownTokenError.new('FreeSpace', token)
|
|
428
|
-
end
|
|
429
|
-
end
|
|
430
|
-
|
|
431
475
|
def quantifier(token)
|
|
432
|
-
|
|
433
|
-
target_node
|
|
434
|
-
while target_node.is_a?(FreeSpace)
|
|
435
|
-
target_node = node.expressions[offset -= 1]
|
|
436
|
-
end
|
|
437
|
-
|
|
438
|
-
target_node || raise(ArgumentError, 'No valid target found for '\
|
|
439
|
-
"'#{token.text}' ")
|
|
476
|
+
target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
|
477
|
+
target_node or raise ParserError, "No valid target found for '#{token.text}'"
|
|
440
478
|
|
|
441
479
|
# in case of chained quantifiers, wrap target in an implicit passive group
|
|
442
480
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
|
@@ -456,7 +494,7 @@ class Regexp::Parser
|
|
|
456
494
|
new_group.implicit = true
|
|
457
495
|
new_group << target_node
|
|
458
496
|
increase_level(target_node)
|
|
459
|
-
node.expressions[
|
|
497
|
+
node.expressions[node.expressions.index(target_node)] = new_group
|
|
460
498
|
target_node = new_group
|
|
461
499
|
end
|
|
462
500
|
|
|
@@ -517,100 +555,16 @@ class Regexp::Parser
|
|
|
517
555
|
target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
|
|
518
556
|
end
|
|
519
557
|
|
|
520
|
-
def
|
|
521
|
-
case token.token
|
|
522
|
-
when :options, :options_switch
|
|
523
|
-
options_group(token)
|
|
524
|
-
when :close
|
|
525
|
-
close_group
|
|
526
|
-
when :comment
|
|
527
|
-
node << Group::Comment.new(token, active_opts)
|
|
528
|
-
else
|
|
529
|
-
open_group(token)
|
|
530
|
-
end
|
|
531
|
-
end
|
|
532
|
-
|
|
533
|
-
MOD_FLAGS = %w[i m x].map(&:to_sym)
|
|
534
|
-
ENC_FLAGS = %w[a d u].map(&:to_sym)
|
|
535
|
-
|
|
536
|
-
def options_group(token)
|
|
537
|
-
positive, negative = token.text.split('-', 2)
|
|
538
|
-
negative ||= ''
|
|
539
|
-
self.switching_options = token.token.equal?(:options_switch)
|
|
540
|
-
|
|
541
|
-
opt_changes = {}
|
|
542
|
-
new_active_opts = active_opts.dup
|
|
543
|
-
|
|
544
|
-
MOD_FLAGS.each do |flag|
|
|
545
|
-
if positive.include?(flag.to_s)
|
|
546
|
-
opt_changes[flag] = new_active_opts[flag] = true
|
|
547
|
-
end
|
|
548
|
-
if negative.include?(flag.to_s)
|
|
549
|
-
opt_changes[flag] = false
|
|
550
|
-
new_active_opts.delete(flag)
|
|
551
|
-
end
|
|
552
|
-
end
|
|
553
|
-
|
|
554
|
-
if (enc_flag = positive.reverse[/[adu]/])
|
|
555
|
-
enc_flag = enc_flag.to_sym
|
|
556
|
-
(ENC_FLAGS - [enc_flag]).each do |other|
|
|
557
|
-
opt_changes[other] = false if new_active_opts[other]
|
|
558
|
-
new_active_opts.delete(other)
|
|
559
|
-
end
|
|
560
|
-
opt_changes[enc_flag] = new_active_opts[enc_flag] = true
|
|
561
|
-
end
|
|
562
|
-
|
|
563
|
-
options_stack << new_active_opts
|
|
564
|
-
|
|
565
|
-
options_group = Group::Options.new(token, active_opts)
|
|
566
|
-
options_group.option_changes = opt_changes
|
|
567
|
-
|
|
568
|
-
nest(options_group)
|
|
569
|
-
end
|
|
570
|
-
|
|
571
|
-
def open_group(token)
|
|
558
|
+
def set(token)
|
|
572
559
|
case token.token
|
|
573
|
-
when :
|
|
574
|
-
|
|
575
|
-
when :
|
|
576
|
-
|
|
577
|
-
when :
|
|
578
|
-
exp = Group::Named.new(token, active_opts)
|
|
579
|
-
when :capture
|
|
580
|
-
exp = Group::Capture.new(token, active_opts)
|
|
581
|
-
when :absence
|
|
582
|
-
exp = Group::Absence.new(token, active_opts)
|
|
583
|
-
|
|
584
|
-
when :lookahead
|
|
585
|
-
exp = Assertion::Lookahead.new(token, active_opts)
|
|
586
|
-
when :nlookahead
|
|
587
|
-
exp = Assertion::NegativeLookahead.new(token, active_opts)
|
|
588
|
-
when :lookbehind
|
|
589
|
-
exp = Assertion::Lookbehind.new(token, active_opts)
|
|
590
|
-
when :nlookbehind
|
|
591
|
-
exp = Assertion::NegativeLookbehind.new(token, active_opts)
|
|
592
|
-
|
|
560
|
+
when :open; open_set(token)
|
|
561
|
+
when :close; close_set
|
|
562
|
+
when :negate; negate_set
|
|
563
|
+
when :range; range(token)
|
|
564
|
+
when :intersection; intersection(token)
|
|
593
565
|
else
|
|
594
|
-
raise UnknownTokenError.new('
|
|
595
|
-
end
|
|
596
|
-
|
|
597
|
-
if exp.capturing?
|
|
598
|
-
exp.number = total_captured_group_count + 1
|
|
599
|
-
exp.number_at_level = captured_group_count_at_level + 1
|
|
600
|
-
count_captured_group
|
|
566
|
+
raise UnknownTokenError.new('CharacterSet', token)
|
|
601
567
|
end
|
|
602
|
-
|
|
603
|
-
# Push the active options to the stack again. This way we can simply pop the
|
|
604
|
-
# stack for any group we close, no matter if it had its own options or not.
|
|
605
|
-
options_stack << active_opts
|
|
606
|
-
|
|
607
|
-
nest(exp)
|
|
608
|
-
end
|
|
609
|
-
|
|
610
|
-
def close_group
|
|
611
|
-
options_stack.pop unless switching_options
|
|
612
|
-
self.switching_options = false
|
|
613
|
-
decrease_nesting
|
|
614
568
|
end
|
|
615
569
|
|
|
616
570
|
def open_set(token)
|
|
@@ -633,51 +587,45 @@ class Regexp::Parser
|
|
|
633
587
|
nest(exp)
|
|
634
588
|
end
|
|
635
589
|
|
|
636
|
-
def close_completed_character_set_range
|
|
637
|
-
decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
|
|
638
|
-
end
|
|
639
|
-
|
|
640
590
|
def intersection(token)
|
|
641
591
|
sequence_operation(CharacterSet::Intersection, token)
|
|
642
592
|
end
|
|
643
593
|
|
|
644
|
-
def
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
594
|
+
def type(token)
|
|
595
|
+
case token.token
|
|
596
|
+
when :digit; node << CharacterType::Digit.new(token, active_opts)
|
|
597
|
+
when :hex; node << CharacterType::Hex.new(token, active_opts)
|
|
598
|
+
when :linebreak; node << CharacterType::Linebreak.new(token, active_opts)
|
|
599
|
+
when :nondigit; node << CharacterType::NonDigit.new(token, active_opts)
|
|
600
|
+
when :nonhex; node << CharacterType::NonHex.new(token, active_opts)
|
|
601
|
+
when :nonspace; node << CharacterType::NonSpace.new(token, active_opts)
|
|
602
|
+
when :nonword; node << CharacterType::NonWord.new(token, active_opts)
|
|
603
|
+
when :space; node << CharacterType::Space.new(token, active_opts)
|
|
604
|
+
when :word; node << CharacterType::Word.new(token, active_opts)
|
|
605
|
+
when :xgrapheme; node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
|
606
|
+
else
|
|
607
|
+
raise UnknownTokenError.new('CharacterType', token)
|
|
651
608
|
end
|
|
652
|
-
node.add_sequence(active_opts)
|
|
653
|
-
end
|
|
654
|
-
|
|
655
|
-
def active_opts
|
|
656
|
-
options_stack.last
|
|
657
|
-
end
|
|
658
|
-
|
|
659
|
-
def total_captured_group_count
|
|
660
|
-
captured_group_counts.values.reduce(0, :+)
|
|
661
|
-
end
|
|
662
|
-
|
|
663
|
-
def captured_group_count_at_level
|
|
664
|
-
captured_group_counts[node.level]
|
|
665
609
|
end
|
|
666
610
|
|
|
667
|
-
def
|
|
668
|
-
|
|
611
|
+
def close_completed_character_set_range
|
|
612
|
+
decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
|
|
669
613
|
end
|
|
670
614
|
|
|
671
|
-
def
|
|
672
|
-
|
|
673
|
-
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
|
615
|
+
def active_opts
|
|
616
|
+
options_stack.last
|
|
674
617
|
end
|
|
675
618
|
|
|
619
|
+
# Assigns referenced expressions to refering expressions, e.g. if there is
|
|
620
|
+
# an instance of Backreference::Number, its #referenced_expression is set to
|
|
621
|
+
# the instance of Group::Capture that it refers to via its number.
|
|
676
622
|
def assign_referenced_expressions
|
|
677
623
|
targets = {}
|
|
624
|
+
# find all referencable expressions
|
|
678
625
|
root.each_expression do |exp|
|
|
679
626
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
|
680
627
|
end
|
|
628
|
+
# assign them to any refering expressions
|
|
681
629
|
root.each_expression do |exp|
|
|
682
630
|
exp.respond_to?(:reference) &&
|
|
683
631
|
exp.referenced_expression = targets[exp.reference]
|