regexp_parser 2.0.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +66 -0
- data/Gemfile +6 -1
- data/README.md +1 -4
- data/Rakefile +8 -8
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression.rb +3 -2
- data/lib/regexp_parser/expression/classes/backref.rb +5 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
- data/lib/regexp_parser/expression/classes/group.rb +12 -2
- data/lib/regexp_parser/expression/classes/property.rb +1 -1
- data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +3 -9
- data/lib/regexp_parser/expression/subexpression.rb +1 -1
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner.rb +1084 -1230
- data/lib/regexp_parser/scanner/scanner.rl +80 -110
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/syntax/any.rb +3 -3
- data/lib/regexp_parser/syntax/base.rb +1 -1
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/version.rb +1 -1
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/expression/subexpression_spec.rb +1 -1
- data/spec/expression/to_s_spec.rb +28 -36
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/errors_spec.rb +1 -1
- data/spec/parser/quantifiers_spec.rb +1 -0
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/scanner/escapes_spec.rb +2 -1
- data/spec/scanner/groups_spec.rb +10 -1
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +57 -14
- data/spec/spec_helper.rb +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
|
4
|
+
data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
|
7
|
+
data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,71 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Fixed
|
6
|
+
|
7
|
+
- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
|
8
|
+
* thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
|
9
|
+
|
10
|
+
## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
|
+
|
12
|
+
### Added
|
13
|
+
|
14
|
+
- common ancestor for all scanning/parsing/lexing errors
|
15
|
+
* `Regexp::Parser::Error` can now be rescued as a catch-all
|
16
|
+
* the following errors (and their many descendants) now inherit from it:
|
17
|
+
- `Regexp::Expression::Conditional::TooManyBranches`
|
18
|
+
- `Regexp::Parser::ParserError`
|
19
|
+
- `Regexp::Scanner::ScannerError`
|
20
|
+
- `Regexp::Scanner::ValidationError`
|
21
|
+
- `Regexp::Syntax::SyntaxError`
|
22
|
+
* it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
|
23
|
+
* thanks to [sandstrom](https://github.com/sandstrom) for the cue
|
24
|
+
|
25
|
+
### Fixed
|
26
|
+
|
27
|
+
- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
|
28
|
+
* a regression in v2.0.1 had caused them to be scanned as literals
|
29
|
+
- fixed scanning of some backreference and subexpression call edge cases
|
30
|
+
* e.g. `\k<+1>`, `\g<x-1>`
|
31
|
+
- fixed tokenization of some escapes in character sets
|
32
|
+
* `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
|
33
|
+
* all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
|
34
|
+
* if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
|
35
|
+
* the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
|
36
|
+
- fixed handling of control/metacontrol escapes in character sets
|
37
|
+
* e.g. `[\cX]`, `[\M-\C-X]`
|
38
|
+
* they were misread as bunch of individual literals, escapes, and ranges
|
39
|
+
- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
|
40
|
+
|
41
|
+
## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
42
|
+
|
43
|
+
### Fixed
|
44
|
+
|
45
|
+
- fixed error when scanning some unlikely and redundant but valid charset patterns
|
46
|
+
* e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
|
47
|
+
- fixed ancestry of some error classes related to syntax version lookup
|
48
|
+
* `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
|
49
|
+
* they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
|
50
|
+
|
51
|
+
## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
52
|
+
|
53
|
+
### Fixed
|
54
|
+
|
55
|
+
- fixed `FrozenError` when calling `#to_s` on a frozen `Group::Passive`
|
56
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon)
|
57
|
+
|
58
|
+
## [2.0.1] - 2020-12-20 - [Janosch Müller](mailto:janosch84@gmail.com)
|
59
|
+
|
60
|
+
### Fixed
|
61
|
+
|
62
|
+
- fixed error when scanning some group names
|
63
|
+
* this affected names containing hyphens, digits or multibyte chars, e.g. `/(?<a1>a)/`
|
64
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
65
|
+
- fixed error when scanning hex escapes with just one hex digit
|
66
|
+
* e.g. `/\x0A/` was scanned correctly, but the equivalent `/\xA/` was not
|
67
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
68
|
+
|
3
69
|
## [2.0.0] - 2020-11-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
70
|
|
5
71
|
### Changed
|
data/Gemfile
CHANGED
@@ -3,7 +3,12 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
+
gem 'ice_nine', '~> 0.11.2'
|
6
7
|
gem 'rake', '~> 13.0'
|
7
8
|
gem 'regexp_property_values', '~> 1.0'
|
8
|
-
gem 'rspec', '~> 3.
|
9
|
+
gem 'rspec', '~> 3.10'
|
10
|
+
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'gouteur'
|
12
|
+
gem 'rubocop', '~> 1.7'
|
13
|
+
end
|
9
14
|
end
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
4
|
|
5
5
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
6
|
|
@@ -22,9 +22,6 @@ _For examples of regexp_parser in use, see [Example Projects](#example-projects)
|
|
22
22
|
* Ragel >= 6.0, but only if you want to build the gem or work on the scanner.
|
23
23
|
|
24
24
|
|
25
|
-
_Note: See the .travis.yml file for covered versions._
|
26
|
-
|
27
|
-
|
28
25
|
---
|
29
26
|
## Install
|
30
27
|
|
data/Rakefile
CHANGED
@@ -7,8 +7,8 @@ require 'bundler'
|
|
7
7
|
require 'rubygems/package_task'
|
8
8
|
|
9
9
|
|
10
|
-
RAGEL_SOURCE_DIR = File.
|
11
|
-
RAGEL_OUTPUT_DIR = File.
|
10
|
+
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
11
|
+
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
12
|
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
13
|
|
14
14
|
|
@@ -25,11 +25,11 @@ end
|
|
25
25
|
|
26
26
|
namespace :ragel do
|
27
27
|
desc "Process the ragel source files and output ruby code"
|
28
|
-
task :rb do
|
29
|
-
RAGEL_SOURCE_FILES.each do |
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{
|
28
|
+
task :rb do
|
29
|
+
RAGEL_SOURCE_FILES.each do |source_file|
|
30
|
+
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
31
31
|
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{
|
32
|
+
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
33
33
|
|
34
34
|
contents = File.read(output_file)
|
35
35
|
|
@@ -42,7 +42,7 @@ namespace :ragel do
|
|
42
42
|
end
|
43
43
|
|
44
44
|
desc "Delete the ragel generated source file(s)"
|
45
|
-
task :clean do
|
45
|
+
task :clean do
|
46
46
|
RAGEL_SOURCE_FILES.each do |file|
|
47
47
|
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
48
48
|
end
|
@@ -61,7 +61,7 @@ namespace :props do
|
|
61
61
|
task :update do
|
62
62
|
require 'regexp_property_values'
|
63
63
|
RegexpPropertyValues.update
|
64
|
-
dir = File.
|
64
|
+
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
65
65
|
|
66
66
|
require 'psych'
|
67
67
|
write_hash_to_file = ->(hash, path) do
|
@@ -1,5 +1,6 @@
|
|
1
|
-
|
1
|
+
require 'regexp_parser/error'
|
2
2
|
|
3
|
+
module Regexp::Expression
|
3
4
|
class Base
|
4
5
|
attr_accessor :type, :token
|
5
6
|
attr_accessor :text, :ts
|
@@ -21,7 +22,7 @@ module Regexp::Expression
|
|
21
22
|
self.options = options
|
22
23
|
end
|
23
24
|
|
24
|
-
def
|
25
|
+
def initialize_copy(orig)
|
25
26
|
self.text = (orig.text ? orig.text.dup : nil)
|
26
27
|
self.options = (orig.options ? orig.options.dup : nil)
|
27
28
|
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
@@ -2,6 +2,11 @@ module Regexp::Expression
|
|
2
2
|
module Backreference
|
3
3
|
class Base < Regexp::Expression::Base
|
4
4
|
attr_accessor :referenced_expression
|
5
|
+
|
6
|
+
def initialize_copy(orig)
|
7
|
+
self.referenced_expression = orig.referenced_expression.dup
|
8
|
+
super
|
9
|
+
end
|
5
10
|
end
|
6
11
|
|
7
12
|
class Number < Backreference::Base
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Conditional
|
3
|
-
class TooManyBranches <
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
4
4
|
def initialize
|
5
5
|
super('The conditional expression has more than 2 branches')
|
6
6
|
end
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
15
15
|
ref = text.tr("'<>()", "")
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
17
|
end
|
18
|
+
|
19
|
+
def initialize_copy(orig)
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
21
|
+
super
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -53,6 +58,11 @@ module Regexp::Expression
|
|
53
58
|
def to_s(format = :full)
|
54
59
|
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
55
60
|
end
|
61
|
+
|
62
|
+
def initialize_copy(orig)
|
63
|
+
self.referenced_expression = orig.referenced_expression.dup
|
64
|
+
super
|
65
|
+
end
|
56
66
|
end
|
57
67
|
end
|
58
68
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
|
3
3
|
class FreeSpace < Regexp::Expression::Base
|
4
|
-
def quantify(
|
5
|
-
raise
|
4
|
+
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
@@ -13,6 +13,11 @@ module Regexp::Expression
|
|
13
13
|
class Passive < Group::Base
|
14
14
|
attr_writer :implicit
|
15
15
|
|
16
|
+
def initialize(*)
|
17
|
+
@implicit = false
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
16
21
|
def to_s(format = :full)
|
17
22
|
if implicit?
|
18
23
|
"#{expressions.join}#{quantifier_affix(format)}"
|
@@ -22,7 +27,7 @@ module Regexp::Expression
|
|
22
27
|
end
|
23
28
|
|
24
29
|
def implicit?
|
25
|
-
@implicit
|
30
|
+
@implicit
|
26
31
|
end
|
27
32
|
end
|
28
33
|
|
@@ -30,6 +35,11 @@ module Regexp::Expression
|
|
30
35
|
class Atomic < Group::Base; end
|
31
36
|
class Options < Group::Base
|
32
37
|
attr_accessor :option_changes
|
38
|
+
|
39
|
+
def initialize_copy(orig)
|
40
|
+
self.option_changes = orig.option_changes.dup
|
41
|
+
super
|
42
|
+
end
|
33
43
|
end
|
34
44
|
|
35
45
|
class Capture < Group::Base
|
@@ -48,7 +58,7 @@ module Regexp::Expression
|
|
48
58
|
super
|
49
59
|
end
|
50
60
|
|
51
|
-
def
|
61
|
+
def initialize_copy(orig)
|
52
62
|
@name = orig.name.dup
|
53
63
|
super
|
54
64
|
end
|
@@ -10,7 +10,7 @@ class Regexp::MatchLength
|
|
10
10
|
self.exp_class = exp.class
|
11
11
|
self.min_rep = exp.repetitions.min
|
12
12
|
self.max_rep = exp.repetitions.max
|
13
|
-
if base = opts[:base]
|
13
|
+
if (base = opts[:base])
|
14
14
|
self.base_min = base
|
15
15
|
self.base_max = base
|
16
16
|
self.reify = ->{ '.' * base }
|
@@ -32,7 +32,7 @@ class Regexp::MatchLength
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
def endless_each
|
35
|
+
def endless_each
|
36
36
|
return enum_for(__method__) unless block_given?
|
37
37
|
(min..max).each { |num| yield(num) if include?(num) }
|
38
38
|
end
|
@@ -36,7 +36,7 @@ module Regexp::Expression
|
|
36
36
|
|
37
37
|
# Iterates over the expressions of this expression as an array, passing
|
38
38
|
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false
|
39
|
+
def each_expression(include_self = false)
|
40
40
|
return enum_for(__method__, include_self) unless block_given?
|
41
41
|
|
42
42
|
traverse(include_self) do |event, exp, index|
|
@@ -47,7 +47,7 @@ module Regexp::Expression
|
|
47
47
|
# Returns a new array with the results of calling the given block once
|
48
48
|
# for every expression. If a block is not given, returns an array with
|
49
49
|
# each expression and its level index as an array.
|
50
|
-
def flat_map(include_self = false
|
50
|
+
def flat_map(include_self = false)
|
51
51
|
result = []
|
52
52
|
|
53
53
|
each_expression(include_self) do |exp, index|
|
@@ -41,17 +41,11 @@ module Regexp::Expression
|
|
41
41
|
alias :ts :starts_at
|
42
42
|
|
43
43
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
44
|
-
|
45
|
-
target
|
46
|
-
|
47
|
-
target = expressions[offset -= 1]
|
48
|
-
end
|
49
|
-
|
50
|
-
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
51
|
-
'quantifier')
|
44
|
+
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
45
|
+
target or raise Regexp::Parser::Error,
|
46
|
+
"No valid target found for '#{text}' quantifier"
|
52
47
|
|
53
48
|
target.quantify(token, text, min, max, mode)
|
54
49
|
end
|
55
50
|
end
|
56
|
-
|
57
51
|
end
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
+
require 'regexp_parser/error'
|
1
2
|
require 'regexp_parser/expression'
|
2
3
|
|
3
4
|
class Regexp::Parser
|
4
5
|
include Regexp::Expression
|
5
|
-
include Regexp::Syntax
|
6
6
|
|
7
|
-
class ParserError <
|
7
|
+
class ParserError < Regexp::Parser::Error; end
|
8
8
|
|
9
9
|
class UnknownTokenTypeError < ParserError
|
10
10
|
def initialize(type, token)
|
@@ -70,95 +70,155 @@ class Regexp::Parser
|
|
70
70
|
enabled_options
|
71
71
|
end
|
72
72
|
|
73
|
-
def
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
73
|
+
def parse_token(token)
|
74
|
+
case token.type
|
75
|
+
when :anchor; anchor(token)
|
76
|
+
when :assertion, :group; group(token)
|
77
|
+
when :backref; backref(token)
|
78
|
+
when :conditional; conditional(token)
|
79
|
+
when :escape; escape(token)
|
80
|
+
when :free_space; free_space(token)
|
81
|
+
when :keep; keep(token)
|
82
|
+
when :literal; literal(token)
|
83
|
+
when :meta; meta(token)
|
84
|
+
when :posixclass, :nonposixclass; posixclass(token)
|
85
|
+
when :property, :nonproperty; property(token)
|
86
|
+
when :quantifier; quantifier(token)
|
87
|
+
when :set; set(token)
|
88
|
+
when :type; type(token)
|
89
|
+
else
|
90
|
+
raise UnknownTokenTypeError.new(token.type, token)
|
91
|
+
end
|
79
92
|
|
80
|
-
|
81
|
-
def update_transplanted_subtree(exp, new_parent)
|
82
|
-
exp.nesting_level = new_parent.nesting_level + 1
|
83
|
-
exp.respond_to?(:each) &&
|
84
|
-
exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
|
93
|
+
close_completed_character_set_range
|
85
94
|
end
|
86
95
|
|
87
|
-
def
|
88
|
-
|
89
|
-
|
90
|
-
|
96
|
+
def anchor(token)
|
97
|
+
case token.token
|
98
|
+
when :bol; node << Anchor::BeginningOfLine.new(token, active_opts)
|
99
|
+
when :bos; node << Anchor::BOS.new(token, active_opts)
|
100
|
+
when :eol; node << Anchor::EndOfLine.new(token, active_opts)
|
101
|
+
when :eos; node << Anchor::EOS.new(token, active_opts)
|
102
|
+
when :eos_ob_eol; node << Anchor::EOSobEOL.new(token, active_opts)
|
103
|
+
when :match_start; node << Anchor::MatchStart.new(token, active_opts)
|
104
|
+
when :nonword_boundary; node << Anchor::NonWordBoundary.new(token, active_opts)
|
105
|
+
when :word_boundary; node << Anchor::WordBoundary.new(token, active_opts)
|
106
|
+
else
|
107
|
+
raise UnknownTokenError.new('Anchor', token)
|
91
108
|
end
|
92
|
-
nesting.pop
|
93
|
-
yield(node) if block_given?
|
94
|
-
self.node = nesting.last
|
95
|
-
self.node = node.last if node.last.is_a?(SequenceOperation)
|
96
109
|
end
|
97
110
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
111
|
+
def group(token)
|
112
|
+
case token.token
|
113
|
+
when :options, :options_switch
|
114
|
+
options_group(token)
|
115
|
+
when :close
|
116
|
+
close_group
|
117
|
+
when :comment
|
118
|
+
node << Group::Comment.new(token, active_opts)
|
119
|
+
else
|
120
|
+
open_group(token)
|
121
|
+
end
|
101
122
|
end
|
102
123
|
|
103
|
-
|
104
|
-
|
124
|
+
MOD_FLAGS = %w[i m x].map(&:to_sym)
|
125
|
+
ENC_FLAGS = %w[a d u].map(&:to_sym)
|
105
126
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
when :escape; escape(token)
|
111
|
-
when :group; group(token)
|
112
|
-
when :assertion; group(token)
|
113
|
-
when :set; set(token)
|
114
|
-
when :type; type(token)
|
115
|
-
when :backref; backref(token)
|
116
|
-
when :conditional; conditional(token)
|
117
|
-
when :keep; keep(token)
|
118
|
-
|
119
|
-
when :posixclass, :nonposixclass
|
120
|
-
posixclass(token)
|
121
|
-
when :property, :nonproperty
|
122
|
-
property(token)
|
123
|
-
|
124
|
-
when :literal
|
125
|
-
node << Literal.new(token, active_opts)
|
126
|
-
when :free_space
|
127
|
-
free_space(token)
|
127
|
+
def options_group(token)
|
128
|
+
positive, negative = token.text.split('-', 2)
|
129
|
+
negative ||= ''
|
130
|
+
self.switching_options = token.token.equal?(:options_switch)
|
128
131
|
|
129
|
-
|
130
|
-
|
132
|
+
opt_changes = {}
|
133
|
+
new_active_opts = active_opts.dup
|
134
|
+
|
135
|
+
MOD_FLAGS.each do |flag|
|
136
|
+
if positive.include?(flag.to_s)
|
137
|
+
opt_changes[flag] = new_active_opts[flag] = true
|
138
|
+
end
|
139
|
+
if negative.include?(flag.to_s)
|
140
|
+
opt_changes[flag] = false
|
141
|
+
new_active_opts.delete(flag)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
if (enc_flag = positive.reverse[/[adu]/])
|
146
|
+
enc_flag = enc_flag.to_sym
|
147
|
+
(ENC_FLAGS - [enc_flag]).each do |other|
|
148
|
+
opt_changes[other] = false if new_active_opts[other]
|
149
|
+
new_active_opts.delete(other)
|
150
|
+
end
|
151
|
+
opt_changes[enc_flag] = new_active_opts[enc_flag] = true
|
131
152
|
end
|
153
|
+
|
154
|
+
options_stack << new_active_opts
|
155
|
+
|
156
|
+
options_group = Group::Options.new(token, active_opts)
|
157
|
+
options_group.option_changes = opt_changes
|
158
|
+
|
159
|
+
nest(options_group)
|
132
160
|
end
|
133
161
|
|
134
|
-
def
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
162
|
+
def open_group(token)
|
163
|
+
group_class =
|
164
|
+
case token.token
|
165
|
+
when :absence; Group::Absence
|
166
|
+
when :atomic; Group::Atomic
|
167
|
+
when :capture; Group::Capture
|
168
|
+
when :named; Group::Named
|
169
|
+
when :passive; Group::Passive
|
170
|
+
|
171
|
+
when :lookahead; Assertion::Lookahead
|
172
|
+
when :lookbehind; Assertion::Lookbehind
|
173
|
+
when :nlookahead; Assertion::NegativeLookahead
|
174
|
+
when :nlookbehind; Assertion::NegativeLookbehind
|
175
|
+
|
176
|
+
else
|
177
|
+
raise UnknownTokenError.new('Group type open', token)
|
178
|
+
end
|
179
|
+
|
180
|
+
group = group_class.new(token, active_opts)
|
181
|
+
|
182
|
+
if group.capturing?
|
183
|
+
group.number = total_captured_group_count + 1
|
184
|
+
group.number_at_level = captured_group_count_at_level + 1
|
185
|
+
count_captured_group
|
150
186
|
end
|
187
|
+
|
188
|
+
# Push the active options to the stack again. This way we can simply pop the
|
189
|
+
# stack for any group we close, no matter if it had its own options or not.
|
190
|
+
options_stack << active_opts
|
191
|
+
|
192
|
+
nest(group)
|
151
193
|
end
|
152
194
|
|
153
|
-
def
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
195
|
+
def total_captured_group_count
|
196
|
+
captured_group_counts.values.reduce(0, :+)
|
197
|
+
end
|
198
|
+
|
199
|
+
def captured_group_count_at_level
|
200
|
+
captured_group_counts[node.level]
|
201
|
+
end
|
202
|
+
|
203
|
+
def count_captured_group
|
204
|
+
captured_group_counts[node.level] += 1
|
205
|
+
end
|
206
|
+
|
207
|
+
def close_group
|
208
|
+
options_stack.pop unless switching_options
|
209
|
+
self.switching_options = false
|
210
|
+
decrease_nesting
|
211
|
+
end
|
212
|
+
|
213
|
+
def decrease_nesting
|
214
|
+
while nesting.last.is_a?(SequenceOperation)
|
215
|
+
nesting.pop
|
216
|
+
self.node = nesting.last
|
161
217
|
end
|
218
|
+
nesting.pop
|
219
|
+
yield(node) if block_given?
|
220
|
+
self.node = nesting.last
|
221
|
+
self.node = node.last if node.last.is_a?(SequenceOperation)
|
162
222
|
end
|
163
223
|
|
164
224
|
def backref(token)
|
@@ -188,31 +248,9 @@ class Regexp::Parser
|
|
188
248
|
end
|
189
249
|
end
|
190
250
|
|
191
|
-
def
|
192
|
-
|
193
|
-
|
194
|
-
node << CharacterType::Digit.new(token, active_opts)
|
195
|
-
when :nondigit
|
196
|
-
node << CharacterType::NonDigit.new(token, active_opts)
|
197
|
-
when :hex
|
198
|
-
node << CharacterType::Hex.new(token, active_opts)
|
199
|
-
when :nonhex
|
200
|
-
node << CharacterType::NonHex.new(token, active_opts)
|
201
|
-
when :space
|
202
|
-
node << CharacterType::Space.new(token, active_opts)
|
203
|
-
when :nonspace
|
204
|
-
node << CharacterType::NonSpace.new(token, active_opts)
|
205
|
-
when :word
|
206
|
-
node << CharacterType::Word.new(token, active_opts)
|
207
|
-
when :nonword
|
208
|
-
node << CharacterType::NonWord.new(token, active_opts)
|
209
|
-
when :linebreak
|
210
|
-
node << CharacterType::Linebreak.new(token, active_opts)
|
211
|
-
when :xgrapheme
|
212
|
-
node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
213
|
-
else
|
214
|
-
raise UnknownTokenError.new('CharacterType', token)
|
215
|
-
end
|
251
|
+
def assign_effective_number(exp)
|
252
|
+
exp.effective_number =
|
253
|
+
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
216
254
|
end
|
217
255
|
|
218
256
|
def conditional(token)
|
@@ -240,11 +278,118 @@ class Regexp::Parser
|
|
240
278
|
end
|
241
279
|
end
|
242
280
|
|
281
|
+
def nest_conditional(exp)
|
282
|
+
conditional_nesting.push(exp)
|
283
|
+
nest(exp)
|
284
|
+
end
|
285
|
+
|
286
|
+
def nest(exp)
|
287
|
+
nesting.push(exp)
|
288
|
+
node << exp
|
289
|
+
update_transplanted_subtree(exp, node)
|
290
|
+
self.node = exp
|
291
|
+
end
|
292
|
+
|
293
|
+
# subtrees are transplanted to build Alternations, Intersections, Ranges
|
294
|
+
def update_transplanted_subtree(exp, new_parent)
|
295
|
+
exp.nesting_level = new_parent.nesting_level + 1
|
296
|
+
exp.respond_to?(:each) &&
|
297
|
+
exp.each { |subexp| update_transplanted_subtree(subexp, exp) }
|
298
|
+
end
|
299
|
+
|
300
|
+
def escape(token)
|
301
|
+
case token.token
|
302
|
+
|
303
|
+
when :backspace; node << EscapeSequence::Backspace.new(token, active_opts)
|
304
|
+
|
305
|
+
when :escape; node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
306
|
+
when :bell; node << EscapeSequence::Bell.new(token, active_opts)
|
307
|
+
when :form_feed; node << EscapeSequence::FormFeed.new(token, active_opts)
|
308
|
+
when :newline; node << EscapeSequence::Newline.new(token, active_opts)
|
309
|
+
when :carriage; node << EscapeSequence::Return.new(token, active_opts)
|
310
|
+
when :tab; node << EscapeSequence::Tab.new(token, active_opts)
|
311
|
+
when :vertical_tab; node << EscapeSequence::VerticalTab.new(token, active_opts)
|
312
|
+
|
313
|
+
when :codepoint; node << EscapeSequence::Codepoint.new(token, active_opts)
|
314
|
+
when :codepoint_list; node << EscapeSequence::CodepointList.new(token, active_opts)
|
315
|
+
when :hex; node << EscapeSequence::Hex.new(token, active_opts)
|
316
|
+
when :octal; node << EscapeSequence::Octal.new(token, active_opts)
|
317
|
+
|
318
|
+
when :control
|
319
|
+
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
320
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
321
|
+
else
|
322
|
+
node << EscapeSequence::Control.new(token, active_opts)
|
323
|
+
end
|
324
|
+
|
325
|
+
when :meta_sequence
|
326
|
+
if token.text =~ /\A\\M-\\[Cc]/
|
327
|
+
node << EscapeSequence::MetaControl.new(token, active_opts)
|
328
|
+
else
|
329
|
+
node << EscapeSequence::Meta.new(token, active_opts)
|
330
|
+
end
|
331
|
+
|
332
|
+
else
|
333
|
+
# treating everything else as a literal
|
334
|
+
# TODO: maybe split this up a bit more in v3.0.0?
|
335
|
+
# E.g. escaped quantifiers or set meta chars are not the same
|
336
|
+
# as stuff that would be a literal even without the backslash.
|
337
|
+
# Right now, they all end up here.
|
338
|
+
node << EscapeSequence::Literal.new(token, active_opts)
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
def free_space(token)
|
343
|
+
case token.token
|
344
|
+
when :comment
|
345
|
+
node << Comment.new(token, active_opts)
|
346
|
+
when :whitespace
|
347
|
+
if node.last.is_a?(WhiteSpace)
|
348
|
+
node.last.merge(WhiteSpace.new(token, active_opts))
|
349
|
+
else
|
350
|
+
node << WhiteSpace.new(token, active_opts)
|
351
|
+
end
|
352
|
+
else
|
353
|
+
raise UnknownTokenError.new('FreeSpace', token)
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
def keep(token)
|
358
|
+
node << Keep::Mark.new(token, active_opts)
|
359
|
+
end
|
360
|
+
|
361
|
+
def literal(token)
|
362
|
+
node << Literal.new(token, active_opts)
|
363
|
+
end
|
364
|
+
|
365
|
+
def meta(token)
|
366
|
+
case token.token
|
367
|
+
when :dot
|
368
|
+
node << CharacterType::Any.new(token, active_opts)
|
369
|
+
when :alternation
|
370
|
+
sequence_operation(Alternation, token)
|
371
|
+
else
|
372
|
+
raise UnknownTokenError.new('Meta', token)
|
373
|
+
end
|
374
|
+
end
|
375
|
+
|
376
|
+
def sequence_operation(klass, token)
|
377
|
+
unless node.is_a?(klass)
|
378
|
+
operator = klass.new(token, active_opts)
|
379
|
+
sequence = operator.add_sequence(active_opts)
|
380
|
+
sequence.expressions = node.expressions
|
381
|
+
node.expressions = []
|
382
|
+
nest(operator)
|
383
|
+
end
|
384
|
+
node.add_sequence(active_opts)
|
385
|
+
end
|
386
|
+
|
243
387
|
def posixclass(token)
|
244
388
|
node << PosixClass.new(token, active_opts)
|
245
389
|
end
|
246
390
|
|
247
391
|
include Regexp::Expression::UnicodeProperty
|
392
|
+
UPTokens = Regexp::Syntax::Token::UnicodeProperty
|
248
393
|
|
249
394
|
def property(token)
|
250
395
|
case token.token
|
@@ -316,127 +461,20 @@ class Regexp::Parser
|
|
316
461
|
when :private_use; node << Codepoint::PrivateUse.new(token, active_opts)
|
317
462
|
when :unassigned; node << Codepoint::Unassigned.new(token, active_opts)
|
318
463
|
|
319
|
-
when *
|
320
|
-
node <<
|
321
|
-
|
322
|
-
when *
|
323
|
-
|
324
|
-
|
325
|
-
when *Token::UnicodeProperty::Emoji
|
326
|
-
node << Emoji.new(token, active_opts)
|
327
|
-
|
328
|
-
when *Token::UnicodeProperty::Script
|
329
|
-
node << Script.new(token, active_opts)
|
330
|
-
|
331
|
-
when *Token::UnicodeProperty::UnicodeBlock
|
332
|
-
node << Block.new(token, active_opts)
|
464
|
+
when *UPTokens::Age; node << Age.new(token, active_opts)
|
465
|
+
when *UPTokens::Derived; node << Derived.new(token, active_opts)
|
466
|
+
when *UPTokens::Emoji; node << Emoji.new(token, active_opts)
|
467
|
+
when *UPTokens::Script; node << Script.new(token, active_opts)
|
468
|
+
when *UPTokens::UnicodeBlock; node << Block.new(token, active_opts)
|
333
469
|
|
334
470
|
else
|
335
471
|
raise UnknownTokenError.new('UnicodeProperty', token)
|
336
472
|
end
|
337
473
|
end
|
338
474
|
|
339
|
-
def anchor(token)
|
340
|
-
case token.token
|
341
|
-
when :bol
|
342
|
-
node << Anchor::BeginningOfLine.new(token, active_opts)
|
343
|
-
when :eol
|
344
|
-
node << Anchor::EndOfLine.new(token, active_opts)
|
345
|
-
when :bos
|
346
|
-
node << Anchor::BOS.new(token, active_opts)
|
347
|
-
when :eos
|
348
|
-
node << Anchor::EOS.new(token, active_opts)
|
349
|
-
when :eos_ob_eol
|
350
|
-
node << Anchor::EOSobEOL.new(token, active_opts)
|
351
|
-
when :word_boundary
|
352
|
-
node << Anchor::WordBoundary.new(token, active_opts)
|
353
|
-
when :nonword_boundary
|
354
|
-
node << Anchor::NonWordBoundary.new(token, active_opts)
|
355
|
-
when :match_start
|
356
|
-
node << Anchor::MatchStart.new(token, active_opts)
|
357
|
-
else
|
358
|
-
raise UnknownTokenError.new('Anchor', token)
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
def escape(token)
|
363
|
-
case token.token
|
364
|
-
|
365
|
-
when :backspace
|
366
|
-
node << EscapeSequence::Backspace.new(token, active_opts)
|
367
|
-
|
368
|
-
when :escape
|
369
|
-
node << EscapeSequence::AsciiEscape.new(token, active_opts)
|
370
|
-
when :bell
|
371
|
-
node << EscapeSequence::Bell.new(token, active_opts)
|
372
|
-
when :form_feed
|
373
|
-
node << EscapeSequence::FormFeed.new(token, active_opts)
|
374
|
-
when :newline
|
375
|
-
node << EscapeSequence::Newline.new(token, active_opts)
|
376
|
-
when :carriage
|
377
|
-
node << EscapeSequence::Return.new(token, active_opts)
|
378
|
-
when :tab
|
379
|
-
node << EscapeSequence::Tab.new(token, active_opts)
|
380
|
-
when :vertical_tab
|
381
|
-
node << EscapeSequence::VerticalTab.new(token, active_opts)
|
382
|
-
|
383
|
-
when :hex
|
384
|
-
node << EscapeSequence::Hex.new(token, active_opts)
|
385
|
-
when :octal
|
386
|
-
node << EscapeSequence::Octal.new(token, active_opts)
|
387
|
-
when :codepoint
|
388
|
-
node << EscapeSequence::Codepoint.new(token, active_opts)
|
389
|
-
when :codepoint_list
|
390
|
-
node << EscapeSequence::CodepointList.new(token, active_opts)
|
391
|
-
|
392
|
-
when :control
|
393
|
-
if token.text =~ /\A(?:\\C-\\M|\\c\\M)/
|
394
|
-
node << EscapeSequence::MetaControl.new(token, active_opts)
|
395
|
-
else
|
396
|
-
node << EscapeSequence::Control.new(token, active_opts)
|
397
|
-
end
|
398
|
-
|
399
|
-
when :meta_sequence
|
400
|
-
if token.text =~ /\A\\M-\\[Cc]/
|
401
|
-
node << EscapeSequence::MetaControl.new(token, active_opts)
|
402
|
-
else
|
403
|
-
node << EscapeSequence::Meta.new(token, active_opts)
|
404
|
-
end
|
405
|
-
|
406
|
-
else
|
407
|
-
# treating everything else as a literal
|
408
|
-
node << EscapeSequence::Literal.new(token, active_opts)
|
409
|
-
end
|
410
|
-
end
|
411
|
-
|
412
|
-
def keep(token)
|
413
|
-
node << Keep::Mark.new(token, active_opts)
|
414
|
-
end
|
415
|
-
|
416
|
-
def free_space(token)
|
417
|
-
case token.token
|
418
|
-
when :comment
|
419
|
-
node << Comment.new(token, active_opts)
|
420
|
-
when :whitespace
|
421
|
-
if node.last.is_a?(WhiteSpace)
|
422
|
-
node.last.merge(WhiteSpace.new(token, active_opts))
|
423
|
-
else
|
424
|
-
node << WhiteSpace.new(token, active_opts)
|
425
|
-
end
|
426
|
-
else
|
427
|
-
raise UnknownTokenError.new('FreeSpace', token)
|
428
|
-
end
|
429
|
-
end
|
430
|
-
|
431
475
|
def quantifier(token)
|
432
|
-
|
433
|
-
target_node
|
434
|
-
while target_node.is_a?(FreeSpace)
|
435
|
-
target_node = node.expressions[offset -= 1]
|
436
|
-
end
|
437
|
-
|
438
|
-
target_node || raise(ArgumentError, 'No valid target found for '\
|
439
|
-
"'#{token.text}' ")
|
476
|
+
target_node = node.expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
477
|
+
target_node or raise ParserError, "No valid target found for '#{token.text}'"
|
440
478
|
|
441
479
|
# in case of chained quantifiers, wrap target in an implicit passive group
|
442
480
|
# description of the problem: https://github.com/ammar/regexp_parser/issues/3
|
@@ -456,7 +494,7 @@ class Regexp::Parser
|
|
456
494
|
new_group.implicit = true
|
457
495
|
new_group << target_node
|
458
496
|
increase_level(target_node)
|
459
|
-
node.expressions[
|
497
|
+
node.expressions[node.expressions.index(target_node)] = new_group
|
460
498
|
target_node = new_group
|
461
499
|
end
|
462
500
|
|
@@ -517,100 +555,16 @@ class Regexp::Parser
|
|
517
555
|
target_node.quantify(:interval, text, min.to_i, max.to_i, mode)
|
518
556
|
end
|
519
557
|
|
520
|
-
def
|
521
|
-
case token.token
|
522
|
-
when :options, :options_switch
|
523
|
-
options_group(token)
|
524
|
-
when :close
|
525
|
-
close_group
|
526
|
-
when :comment
|
527
|
-
node << Group::Comment.new(token, active_opts)
|
528
|
-
else
|
529
|
-
open_group(token)
|
530
|
-
end
|
531
|
-
end
|
532
|
-
|
533
|
-
MOD_FLAGS = %w[i m x].map(&:to_sym)
|
534
|
-
ENC_FLAGS = %w[a d u].map(&:to_sym)
|
535
|
-
|
536
|
-
def options_group(token)
|
537
|
-
positive, negative = token.text.split('-', 2)
|
538
|
-
negative ||= ''
|
539
|
-
self.switching_options = token.token.equal?(:options_switch)
|
540
|
-
|
541
|
-
opt_changes = {}
|
542
|
-
new_active_opts = active_opts.dup
|
543
|
-
|
544
|
-
MOD_FLAGS.each do |flag|
|
545
|
-
if positive.include?(flag.to_s)
|
546
|
-
opt_changes[flag] = new_active_opts[flag] = true
|
547
|
-
end
|
548
|
-
if negative.include?(flag.to_s)
|
549
|
-
opt_changes[flag] = false
|
550
|
-
new_active_opts.delete(flag)
|
551
|
-
end
|
552
|
-
end
|
553
|
-
|
554
|
-
if (enc_flag = positive.reverse[/[adu]/])
|
555
|
-
enc_flag = enc_flag.to_sym
|
556
|
-
(ENC_FLAGS - [enc_flag]).each do |other|
|
557
|
-
opt_changes[other] = false if new_active_opts[other]
|
558
|
-
new_active_opts.delete(other)
|
559
|
-
end
|
560
|
-
opt_changes[enc_flag] = new_active_opts[enc_flag] = true
|
561
|
-
end
|
562
|
-
|
563
|
-
options_stack << new_active_opts
|
564
|
-
|
565
|
-
options_group = Group::Options.new(token, active_opts)
|
566
|
-
options_group.option_changes = opt_changes
|
567
|
-
|
568
|
-
nest(options_group)
|
569
|
-
end
|
570
|
-
|
571
|
-
def open_group(token)
|
558
|
+
def set(token)
|
572
559
|
case token.token
|
573
|
-
when :
|
574
|
-
|
575
|
-
when :
|
576
|
-
|
577
|
-
when :
|
578
|
-
exp = Group::Named.new(token, active_opts)
|
579
|
-
when :capture
|
580
|
-
exp = Group::Capture.new(token, active_opts)
|
581
|
-
when :absence
|
582
|
-
exp = Group::Absence.new(token, active_opts)
|
583
|
-
|
584
|
-
when :lookahead
|
585
|
-
exp = Assertion::Lookahead.new(token, active_opts)
|
586
|
-
when :nlookahead
|
587
|
-
exp = Assertion::NegativeLookahead.new(token, active_opts)
|
588
|
-
when :lookbehind
|
589
|
-
exp = Assertion::Lookbehind.new(token, active_opts)
|
590
|
-
when :nlookbehind
|
591
|
-
exp = Assertion::NegativeLookbehind.new(token, active_opts)
|
592
|
-
|
560
|
+
when :open; open_set(token)
|
561
|
+
when :close; close_set
|
562
|
+
when :negate; negate_set
|
563
|
+
when :range; range(token)
|
564
|
+
when :intersection; intersection(token)
|
593
565
|
else
|
594
|
-
raise UnknownTokenError.new('
|
595
|
-
end
|
596
|
-
|
597
|
-
if exp.capturing?
|
598
|
-
exp.number = total_captured_group_count + 1
|
599
|
-
exp.number_at_level = captured_group_count_at_level + 1
|
600
|
-
count_captured_group
|
566
|
+
raise UnknownTokenError.new('CharacterSet', token)
|
601
567
|
end
|
602
|
-
|
603
|
-
# Push the active options to the stack again. This way we can simply pop the
|
604
|
-
# stack for any group we close, no matter if it had its own options or not.
|
605
|
-
options_stack << active_opts
|
606
|
-
|
607
|
-
nest(exp)
|
608
|
-
end
|
609
|
-
|
610
|
-
def close_group
|
611
|
-
options_stack.pop unless switching_options
|
612
|
-
self.switching_options = false
|
613
|
-
decrease_nesting
|
614
568
|
end
|
615
569
|
|
616
570
|
def open_set(token)
|
@@ -633,51 +587,45 @@ class Regexp::Parser
|
|
633
587
|
nest(exp)
|
634
588
|
end
|
635
589
|
|
636
|
-
def close_completed_character_set_range
|
637
|
-
decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
|
638
|
-
end
|
639
|
-
|
640
590
|
def intersection(token)
|
641
591
|
sequence_operation(CharacterSet::Intersection, token)
|
642
592
|
end
|
643
593
|
|
644
|
-
def
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
594
|
+
def type(token)
|
595
|
+
case token.token
|
596
|
+
when :digit; node << CharacterType::Digit.new(token, active_opts)
|
597
|
+
when :hex; node << CharacterType::Hex.new(token, active_opts)
|
598
|
+
when :linebreak; node << CharacterType::Linebreak.new(token, active_opts)
|
599
|
+
when :nondigit; node << CharacterType::NonDigit.new(token, active_opts)
|
600
|
+
when :nonhex; node << CharacterType::NonHex.new(token, active_opts)
|
601
|
+
when :nonspace; node << CharacterType::NonSpace.new(token, active_opts)
|
602
|
+
when :nonword; node << CharacterType::NonWord.new(token, active_opts)
|
603
|
+
when :space; node << CharacterType::Space.new(token, active_opts)
|
604
|
+
when :word; node << CharacterType::Word.new(token, active_opts)
|
605
|
+
when :xgrapheme; node << CharacterType::ExtendedGrapheme.new(token, active_opts)
|
606
|
+
else
|
607
|
+
raise UnknownTokenError.new('CharacterType', token)
|
651
608
|
end
|
652
|
-
node.add_sequence(active_opts)
|
653
|
-
end
|
654
|
-
|
655
|
-
def active_opts
|
656
|
-
options_stack.last
|
657
|
-
end
|
658
|
-
|
659
|
-
def total_captured_group_count
|
660
|
-
captured_group_counts.values.reduce(0, :+)
|
661
|
-
end
|
662
|
-
|
663
|
-
def captured_group_count_at_level
|
664
|
-
captured_group_counts[node.level]
|
665
609
|
end
|
666
610
|
|
667
|
-
def
|
668
|
-
|
611
|
+
def close_completed_character_set_range
|
612
|
+
decrease_nesting if node.is_a?(CharacterSet::Range) && node.complete?
|
669
613
|
end
|
670
614
|
|
671
|
-
def
|
672
|
-
|
673
|
-
exp.number + total_captured_group_count + (exp.number < 0 ? 1 : 0)
|
615
|
+
def active_opts
|
616
|
+
options_stack.last
|
674
617
|
end
|
675
618
|
|
619
|
+
# Assigns referenced expressions to refering expressions, e.g. if there is
|
620
|
+
# an instance of Backreference::Number, its #referenced_expression is set to
|
621
|
+
# the instance of Group::Capture that it refers to via its number.
|
676
622
|
def assign_referenced_expressions
|
677
623
|
targets = {}
|
624
|
+
# find all referencable expressions
|
678
625
|
root.each_expression do |exp|
|
679
626
|
exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
|
680
627
|
end
|
628
|
+
# assign them to any refering expressions
|
681
629
|
root.each_expression do |exp|
|
682
630
|
exp.respond_to?(:reference) &&
|
683
631
|
exp.referenced_expression = targets[exp.reference]
|