regexp_parser 1.7.1 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +157 -1
- data/Gemfile +6 -1
- data/LICENSE +1 -1
- data/README.md +38 -32
- data/Rakefile +18 -27
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -4
- data/lib/regexp_parser/expression/classes/group.rb +28 -3
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +4 -17
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
- data/lib/regexp_parser/expression/quantifier.rb +11 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -20
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -139
- data/lib/regexp_parser/lexer.rb +13 -11
- data/lib/regexp_parser/parser.rb +325 -344
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/properties/long.csv +604 -0
- data/lib/regexp_parser/scanner/properties/short.csv +242 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +235 -255
- data/lib/regexp_parser/scanner.rb +1324 -1387
- data/lib/regexp_parser/syntax/any.rb +4 -6
- data/lib/regexp_parser/syntax/base.rb +13 -15
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +34 -165
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -52
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
@@ -1,16 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
|
2
3
|
module EscapeSequence
|
3
4
|
class Base < Regexp::Expression::Base
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
def char
|
7
|
-
# poor man's unescape without using eval
|
8
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
9
|
-
end
|
10
|
-
|
11
5
|
def codepoint
|
12
6
|
char.ord
|
13
7
|
end
|
8
|
+
|
9
|
+
if ''.respond_to?(:undump)
|
10
|
+
def char
|
11
|
+
%("#{text}").undump
|
12
|
+
end
|
13
|
+
else
|
14
|
+
# poor man's unescape without using eval
|
15
|
+
require 'yaml'
|
16
|
+
def char
|
17
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
+
end
|
19
|
+
end
|
14
20
|
end
|
15
21
|
|
16
22
|
class Literal < EscapeSequence::Base
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
|
-
def quantify(
|
5
|
-
raise
|
3
|
+
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
4
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
5
|
end
|
7
6
|
end
|
8
7
|
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -10,11 +10,36 @@ module Regexp::Expression
|
|
10
10
|
def comment?; false end
|
11
11
|
end
|
12
12
|
|
13
|
-
class
|
14
|
-
|
13
|
+
class Passive < Group::Base
|
14
|
+
attr_writer :implicit
|
15
|
+
|
16
|
+
def initialize(*)
|
17
|
+
@implicit = false
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_s(format = :full)
|
22
|
+
if implicit?
|
23
|
+
"#{expressions.join}#{quantifier_affix(format)}"
|
24
|
+
else
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def implicit?
|
30
|
+
@implicit
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
15
34
|
class Absence < Group::Base; end
|
35
|
+
class Atomic < Group::Base; end
|
16
36
|
class Options < Group::Base
|
17
37
|
attr_accessor :option_changes
|
38
|
+
|
39
|
+
def initialize_copy(orig)
|
40
|
+
self.option_changes = orig.option_changes.dup
|
41
|
+
super
|
42
|
+
end
|
18
43
|
end
|
19
44
|
|
20
45
|
class Capture < Group::Base
|
@@ -33,7 +58,7 @@ module Regexp::Expression
|
|
33
58
|
super
|
34
59
|
end
|
35
60
|
|
36
|
-
def
|
61
|
+
def initialize_copy(orig)
|
37
62
|
@name = orig.name.dup
|
38
63
|
super
|
39
64
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -7,7 +6,7 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
|
9
8
|
def name
|
10
|
-
text
|
9
|
+
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -1,24 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
|
6
|
-
unless args.first.is_a?(Regexp::Token)
|
7
|
-
warn('WARNING: Root.new without a Token argument is deprecated and '\
|
8
|
-
'will be removed in 2.0.0. Use Root.build for the old behavior.')
|
9
|
-
return super(self.class.build_token, *args)
|
10
|
-
end
|
11
|
-
super
|
3
|
+
def self.build(options = {})
|
4
|
+
new(build_token, options)
|
12
5
|
end
|
13
6
|
|
14
|
-
|
15
|
-
|
16
|
-
new(build_token, options)
|
17
|
-
end
|
18
|
-
|
19
|
-
def build_token
|
20
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
21
|
-
end
|
7
|
+
def self.build_token
|
8
|
+
Regexp::Token.new(:expression, :root, '', 0)
|
22
9
|
end
|
23
10
|
end
|
24
11
|
end
|
@@ -10,7 +10,7 @@ class Regexp::MatchLength
|
|
10
10
|
self.exp_class = exp.class
|
11
11
|
self.min_rep = exp.repetitions.min
|
12
12
|
self.max_rep = exp.repetitions.max
|
13
|
-
if base = opts[:base]
|
13
|
+
if (base = opts[:base])
|
14
14
|
self.base_min = base
|
15
15
|
self.base_max = base
|
16
16
|
self.reify = ->{ '.' * base }
|
@@ -32,7 +32,7 @@ class Regexp::MatchLength
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
def endless_each
|
35
|
+
def endless_each
|
36
36
|
return enum_for(__method__) unless block_given?
|
37
37
|
(min..max).each { |num| yield(num) if include?(num) }
|
38
38
|
end
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
@@ -36,7 +36,7 @@ module Regexp::Expression
|
|
36
36
|
|
37
37
|
# Iterates over the expressions of this expression as an array, passing
|
38
38
|
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false
|
39
|
+
def each_expression(include_self = false)
|
40
40
|
return enum_for(__method__, include_self) unless block_given?
|
41
41
|
|
42
42
|
traverse(include_self) do |event, exp, index|
|
@@ -47,7 +47,7 @@ module Regexp::Expression
|
|
47
47
|
# Returns a new array with the results of calling the given block once
|
48
48
|
# for every expression. If a block is not given, returns an array with
|
49
49
|
# each expression and its level index as an array.
|
50
|
-
def flat_map(include_self = false
|
50
|
+
def flat_map(include_self = false)
|
51
51
|
result = []
|
52
52
|
|
53
53
|
each_expression(include_self) do |exp, index|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Quantifier
|
3
|
-
MODES = [
|
3
|
+
MODES = %i[greedy possessive reluctant]
|
4
4
|
|
5
5
|
attr_reader :token, :text, :min, :max, :mode
|
6
6
|
|
@@ -12,7 +12,7 @@ module Regexp::Expression
|
|
12
12
|
@max = max
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
15
|
+
def initialize_copy(orig)
|
16
16
|
@text = orig.text.dup
|
17
17
|
super
|
18
18
|
end
|
@@ -40,5 +40,14 @@ module Regexp::Expression
|
|
40
40
|
RUBY
|
41
41
|
end
|
42
42
|
alias :lazy? :reluctant?
|
43
|
+
|
44
|
+
def ==(other)
|
45
|
+
other.class == self.class &&
|
46
|
+
other.token == token &&
|
47
|
+
other.mode == mode &&
|
48
|
+
other.min == min &&
|
49
|
+
other.max == max
|
50
|
+
end
|
51
|
+
alias :eq :==
|
43
52
|
end
|
44
53
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -7,16 +6,6 @@ module Regexp::Expression
|
|
7
6
|
# Used as the base class for the Alternation alternatives, Conditional
|
8
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
9
8
|
class Sequence < Regexp::Expression::Subexpression
|
10
|
-
# TODO: this override is here for backwards compatibility, remove in 2.0.0
|
11
|
-
def initialize(*args)
|
12
|
-
if args.count == 3
|
13
|
-
warn('WARNING: Sequence.new without a Regexp::Token argument is '\
|
14
|
-
'deprecated and will be removed in 2.0.0.')
|
15
|
-
return self.class.at_levels(*args)
|
16
|
-
end
|
17
|
-
super
|
18
|
-
end
|
19
|
-
|
20
9
|
class << self
|
21
10
|
def add_to(subexpression, params = {}, active_opts = {})
|
22
11
|
sequence = at_levels(
|
@@ -51,17 +40,11 @@ module Regexp::Expression
|
|
51
40
|
alias :ts :starts_at
|
52
41
|
|
53
42
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
54
|
-
|
55
|
-
target
|
56
|
-
|
57
|
-
target = expressions[offset -= 1]
|
58
|
-
end
|
59
|
-
|
60
|
-
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
61
|
-
'quantifier')
|
43
|
+
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
|
+
target or raise Regexp::Parser::Error,
|
45
|
+
"No valid target found for '#{text}' quantifier"
|
62
46
|
|
63
47
|
target.quantify(token, text, min, max, mode)
|
64
48
|
end
|
65
49
|
end
|
66
|
-
|
67
50
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
@@ -12,7 +11,7 @@ module Regexp::Expression
|
|
12
11
|
end
|
13
12
|
|
14
13
|
# Override base method to clone the expressions as well.
|
15
|
-
def
|
14
|
+
def initialize_copy(orig)
|
16
15
|
self.expressions = orig.expressions.map(&:clone)
|
17
16
|
super
|
18
17
|
end
|
@@ -1,138 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class Base
|
4
|
-
attr_accessor :type, :token
|
5
|
-
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
|
-
|
8
|
-
attr_accessor :quantifier
|
9
|
-
attr_accessor :options
|
10
|
-
|
11
|
-
def initialize(token, options = {})
|
12
|
-
self.type = token.type
|
13
|
-
self.token = token.token
|
14
|
-
self.text = token.text
|
15
|
-
self.ts = token.ts
|
16
|
-
self.level = token.level
|
17
|
-
self.set_level = token.set_level
|
18
|
-
self.conditional_level = token.conditional_level
|
19
|
-
self.nesting_level = 0
|
20
|
-
self.quantifier = nil
|
21
|
-
self.options = options
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize_clone(orig)
|
25
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
|
-
super
|
29
|
-
end
|
30
|
-
|
31
|
-
def to_re(format = :full)
|
32
|
-
::Regexp.new(to_s(format))
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :starts_at :ts
|
36
|
-
|
37
|
-
def full_length
|
38
|
-
to_s.length
|
39
|
-
end
|
40
|
-
|
41
|
-
def offset
|
42
|
-
[starts_at, full_length]
|
43
|
-
end
|
44
|
-
|
45
|
-
def coded_offset
|
46
|
-
'@%d+%d' % offset
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s(format = :full)
|
50
|
-
"#{text}#{quantifier_affix(format)}"
|
51
|
-
end
|
52
|
-
|
53
|
-
def quantifier_affix(expression_format)
|
54
|
-
quantifier.to_s if quantified? && expression_format != :base
|
55
|
-
end
|
56
|
-
|
57
|
-
def terminal?
|
58
|
-
!respond_to?(:expressions)
|
59
|
-
end
|
60
|
-
|
61
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
62
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
63
|
-
end
|
64
|
-
|
65
|
-
def unquantified_clone
|
66
|
-
clone.tap { |exp| exp.quantifier = nil }
|
67
|
-
end
|
68
|
-
|
69
|
-
def quantified?
|
70
|
-
!quantifier.nil?
|
71
|
-
end
|
72
|
-
|
73
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
74
|
-
def quantity
|
75
|
-
return [nil,nil] unless quantified?
|
76
|
-
[quantifier.min, quantifier.max]
|
77
|
-
end
|
78
|
-
|
79
|
-
def repetitions
|
80
|
-
return 1..1 unless quantified?
|
81
|
-
min = quantifier.min
|
82
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
83
|
-
# fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
|
84
|
-
(min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
|
85
|
-
end
|
86
|
-
|
87
|
-
def greedy?
|
88
|
-
quantified? and quantifier.greedy?
|
89
|
-
end
|
90
|
-
|
91
|
-
def reluctant?
|
92
|
-
quantified? and quantifier.reluctant?
|
93
|
-
end
|
94
|
-
alias :lazy? :reluctant?
|
95
|
-
|
96
|
-
def possessive?
|
97
|
-
quantified? and quantifier.possessive?
|
98
|
-
end
|
99
|
-
|
100
|
-
def attributes
|
101
|
-
{
|
102
|
-
type: type,
|
103
|
-
token: token,
|
104
|
-
text: to_s(:base),
|
105
|
-
starts_at: ts,
|
106
|
-
length: full_length,
|
107
|
-
level: level,
|
108
|
-
set_level: set_level,
|
109
|
-
conditional_level: conditional_level,
|
110
|
-
options: options,
|
111
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
112
|
-
}
|
113
|
-
end
|
114
|
-
alias :to_h :attributes
|
115
|
-
end
|
116
|
-
|
117
|
-
def self.parsed(exp)
|
118
|
-
warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
|
119
|
-
'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
|
120
|
-
case exp
|
121
|
-
when String
|
122
|
-
Regexp::Parser.parse(exp)
|
123
|
-
when Regexp
|
124
|
-
Regexp::Parser.parse(exp.source) # <- causes loss of root options
|
125
|
-
when Regexp::Expression # <- never triggers
|
126
|
-
exp
|
127
|
-
else
|
128
|
-
raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
|
129
|
-
'a Regexp::Expression as a value for exp, but it '\
|
130
|
-
"was given #{exp.class.name}."
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
end # module Regexp::Expression
|
1
|
+
require 'regexp_parser/error'
|
135
2
|
|
3
|
+
require 'regexp_parser/expression/base'
|
136
4
|
require 'regexp_parser/expression/quantifier'
|
137
5
|
require 'regexp_parser/expression/subexpression'
|
138
6
|
require 'regexp_parser/expression/sequence'
|
@@ -140,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
|
|
140
8
|
|
141
9
|
require 'regexp_parser/expression/classes/alternation'
|
142
10
|
require 'regexp_parser/expression/classes/anchor'
|
143
|
-
require 'regexp_parser/expression/classes/
|
11
|
+
require 'regexp_parser/expression/classes/backreference'
|
12
|
+
require 'regexp_parser/expression/classes/character_set'
|
13
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
144
15
|
require 'regexp_parser/expression/classes/conditional'
|
145
|
-
require 'regexp_parser/expression/classes/
|
16
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
146
17
|
require 'regexp_parser/expression/classes/free_space'
|
147
18
|
require 'regexp_parser/expression/classes/group'
|
148
19
|
require 'regexp_parser/expression/classes/keep'
|
@@ -150,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
|
|
150
21
|
require 'regexp_parser/expression/classes/posix_class'
|
151
22
|
require 'regexp_parser/expression/classes/property'
|
152
23
|
require 'regexp_parser/expression/classes/root'
|
153
|
-
require 'regexp_parser/expression/classes/set'
|
154
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
155
|
-
require 'regexp_parser/expression/classes/set/range'
|
156
24
|
require 'regexp_parser/expression/classes/type'
|
157
25
|
|
158
26
|
require 'regexp_parser/expression/methods/match'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,18 +4,20 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
13
|
|
14
|
-
|
15
|
-
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
15
|
+
|
16
|
+
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
17
|
+
new.lex(input, syntax, options: options, &block)
|
16
18
|
end
|
17
19
|
|
18
|
-
def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
|
20
|
+
def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
19
21
|
syntax = Regexp::Syntax.new(syntax)
|
20
22
|
|
21
23
|
self.tokens = []
|
@@ -25,7 +27,7 @@ class Regexp::Lexer
|
|
25
27
|
self.shift = 0
|
26
28
|
|
27
29
|
last = nil
|
28
|
-
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
|
30
|
+
Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
|
29
31
|
type, token = *syntax.normalize(type, token)
|
30
32
|
syntax.check! type, token
|
31
33
|
|
@@ -40,7 +42,7 @@ class Regexp::Lexer
|
|
40
42
|
nesting, set_nesting, conditional_nesting)
|
41
43
|
|
42
44
|
current = merge_condition(current) if type == :conditional and
|
43
|
-
|
45
|
+
CONDITION_TOKENS.include?(token)
|
44
46
|
|
45
47
|
last.next = current if last
|
46
48
|
current.previous = last if last
|
@@ -96,10 +98,10 @@ class Regexp::Lexer
|
|
96
98
|
|
97
99
|
tokens.pop
|
98
100
|
tokens << Regexp::Token.new(:literal, :literal, lead,
|
99
|
-
token.ts, (token.te - last.
|
101
|
+
token.ts, (token.te - last.length),
|
100
102
|
nesting, set_nesting, conditional_nesting)
|
101
103
|
tokens << Regexp::Token.new(:literal, :literal, last,
|
102
|
-
(token.ts + lead.
|
104
|
+
(token.ts + lead.length), token.te,
|
103
105
|
nesting, set_nesting, conditional_nesting)
|
104
106
|
end
|
105
107
|
|