regexp_parser 1.7.0 → 2.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +364 -22
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/README.md +124 -88
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +295 -368
- data/lib/regexp_parser/scanner.rb +1405 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +49 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,26 +1,20 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
3
|
-
|
7
|
+
include Regexp::Expression::Shared
|
4
8
|
|
5
|
-
|
9
|
+
MODES = %i[greedy possessive reluctant]
|
6
10
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_clone(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
11
|
+
def initialize(*args)
|
12
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
13
|
|
20
|
-
|
21
|
-
|
14
|
+
init_from_token_and_options(*args)
|
15
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
16
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
17
|
end
|
23
|
-
alias :to_str :to_s
|
24
18
|
|
25
19
|
def to_h
|
26
20
|
{
|
@@ -40,5 +34,51 @@ module Regexp::Expression
|
|
40
34
|
RUBY
|
41
35
|
end
|
42
36
|
alias :lazy? :reluctant?
|
37
|
+
|
38
|
+
def min
|
39
|
+
derived_data[:min]
|
40
|
+
end
|
41
|
+
|
42
|
+
def max
|
43
|
+
derived_data[:max]
|
44
|
+
end
|
45
|
+
|
46
|
+
def mode
|
47
|
+
derived_data[:mode]
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
|
53
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
54
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
55
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
56
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
57
|
+
"will be derived automatically.\n"\
|
58
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
59
|
+
"This is consistent with how Expression::Base instances are created. "
|
60
|
+
@token = token
|
61
|
+
@text = text
|
62
|
+
end
|
63
|
+
|
64
|
+
def derived_data
|
65
|
+
@derived_data ||= begin
|
66
|
+
min, max =
|
67
|
+
case text[0]
|
68
|
+
when '?'; [0, 1]
|
69
|
+
when '*'; [0, -1]
|
70
|
+
when '+'; [1, -1]
|
71
|
+
else
|
72
|
+
int_min = text[/\{(\d*)/, 1]
|
73
|
+
int_max = text[/,?(\d*)\}/, 1]
|
74
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
75
|
+
end
|
76
|
+
|
77
|
+
mod = text[/.([?+])/, 1]
|
78
|
+
mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
|
79
|
+
|
80
|
+
{ min: min, max: max, mode: mode }
|
81
|
+
end
|
82
|
+
end
|
43
83
|
end
|
44
84
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -7,61 +6,26 @@ module Regexp::Expression
|
|
7
6
|
# Used as the base class for the Alternation alternatives, Conditional
|
8
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
9
8
|
class Sequence < Regexp::Expression::Subexpression
|
10
|
-
# TODO: this override is here for backwards compatibility, remove in 2.0.0
|
11
|
-
def initialize(*args)
|
12
|
-
if args.count == 3
|
13
|
-
warn('WARNING: Sequence.new without a Regexp::Token argument is '\
|
14
|
-
'deprecated and will be removed in 2.0.0.')
|
15
|
-
return self.class.at_levels(*args)
|
16
|
-
end
|
17
|
-
super
|
18
|
-
end
|
19
|
-
|
20
9
|
class << self
|
21
|
-
def add_to(
|
22
|
-
sequence =
|
23
|
-
|
24
|
-
|
25
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
|
+
ts: params[:ts],
|
26
16
|
)
|
27
|
-
sequence.nesting_level = subexpression.nesting_level + 1
|
28
17
|
sequence.options = active_opts
|
29
|
-
|
18
|
+
exp.expressions << sequence
|
30
19
|
sequence
|
31
20
|
end
|
32
|
-
|
33
|
-
def at_levels(level, set_level, conditional_level)
|
34
|
-
token = Regexp::Token.new(
|
35
|
-
:expression,
|
36
|
-
:sequence,
|
37
|
-
'',
|
38
|
-
nil, # ts
|
39
|
-
nil, # te
|
40
|
-
level,
|
41
|
-
set_level,
|
42
|
-
conditional_level
|
43
|
-
)
|
44
|
-
new(token)
|
45
|
-
end
|
46
21
|
end
|
47
22
|
|
48
|
-
def
|
49
|
-
expressions.first.
|
23
|
+
def ts
|
24
|
+
(head = expressions.first) ? head.ts : @ts
|
50
25
|
end
|
51
|
-
alias :ts :starts_at
|
52
|
-
|
53
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
54
|
-
offset = -1
|
55
|
-
target = expressions[offset]
|
56
|
-
while target.is_a?(FreeSpace)
|
57
|
-
target = expressions[offset -= 1]
|
58
|
-
end
|
59
26
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
target.quantify(token, text, min, max, mode)
|
27
|
+
def quantify(token, *args)
|
28
|
+
extract_quantifier_target(token.text).quantify(token, *args)
|
64
29
|
end
|
65
30
|
end
|
66
|
-
|
67
31
|
end
|
@@ -5,21 +5,16 @@ module Regexp::Expression
|
|
5
5
|
alias :operands :expressions
|
6
6
|
alias :operator :text
|
7
7
|
|
8
|
-
def
|
9
|
-
expressions.first.
|
8
|
+
def ts
|
9
|
+
(head = expressions.first) ? head.ts : @ts
|
10
10
|
end
|
11
|
-
alias :ts :starts_at
|
12
11
|
|
13
12
|
def <<(exp)
|
14
13
|
expressions.last << exp
|
15
14
|
end
|
16
15
|
|
17
|
-
def add_sequence(active_opts = {})
|
18
|
-
self.class::OPERAND.add_to(self,
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_s(format = :full)
|
22
|
-
sequences.map { |e| e.to_s(format) }.join(text)
|
16
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
17
|
+
self.class::OPERAND.add_to(self, params, active_opts)
|
23
18
|
end
|
24
19
|
end
|
25
20
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
9
|
+
attr_accessor :type, :token, :text, :ts, :te,
|
10
|
+
:level, :set_level, :conditional_level,
|
11
|
+
:options, :parent,
|
12
|
+
:custom_to_s_handling, :pre_quantifier_decorations
|
13
|
+
|
14
|
+
attr_reader :nesting_level, :quantifier
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def init_from_token_and_options(token, options = {})
|
19
|
+
self.type = token.type
|
20
|
+
self.token = token.token
|
21
|
+
self.text = token.text
|
22
|
+
self.ts = token.ts
|
23
|
+
self.te = token.te
|
24
|
+
self.level = token.level
|
25
|
+
self.set_level = token.set_level
|
26
|
+
self.conditional_level = token.conditional_level
|
27
|
+
self.nesting_level = 0
|
28
|
+
self.options = options || {}
|
29
|
+
end
|
30
|
+
private :init_from_token_and_options
|
31
|
+
|
32
|
+
def initialize_copy(orig)
|
33
|
+
self.text = orig.text.dup if orig.text
|
34
|
+
self.options = orig.options.dup if orig.options
|
35
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
36
|
+
self.parent = nil # updated by Subexpression#initialize_copy
|
37
|
+
if orig.pre_quantifier_decorations
|
38
|
+
self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
|
39
|
+
end
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
def starts_at
|
44
|
+
ts
|
45
|
+
end
|
46
|
+
|
47
|
+
def ends_at(include_quantifier = true)
|
48
|
+
ts + (include_quantifier ? full_length : base_length)
|
49
|
+
end
|
50
|
+
|
51
|
+
def base_length
|
52
|
+
to_s(:base).length
|
53
|
+
end
|
54
|
+
|
55
|
+
def full_length
|
56
|
+
to_s(:original).length
|
57
|
+
end
|
58
|
+
|
59
|
+
# #to_s reproduces the original source, as an unparser would.
|
60
|
+
#
|
61
|
+
# It takes an optional format argument.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
#
|
65
|
+
# lit = Regexp::Parser.parse(/a +/x)[0]
|
66
|
+
#
|
67
|
+
# lit.to_s # => 'a+' # default; with quantifier
|
68
|
+
# lit.to_s(:full) # => 'a+' # default; with quantifier
|
69
|
+
# lit.to_s(:base) # => 'a' # without quantifier
|
70
|
+
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
71
|
+
#
|
72
|
+
def to_s(format = :full)
|
73
|
+
base = parts.each_with_object(''.dup) do |part, buff|
|
74
|
+
if part.instance_of?(String)
|
75
|
+
buff << part
|
76
|
+
elsif !part.custom_to_s_handling
|
77
|
+
buff << part.to_s(:original)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
81
|
+
end
|
82
|
+
alias :to_str :to_s
|
83
|
+
|
84
|
+
def pre_quantifier_decoration(expression_format = :original)
|
85
|
+
pre_quantifier_decorations.to_a.join if expression_format == :original
|
86
|
+
end
|
87
|
+
|
88
|
+
def quantifier_affix(expression_format = :full)
|
89
|
+
quantifier.to_s if quantified? && expression_format != :base
|
90
|
+
end
|
91
|
+
|
92
|
+
def offset
|
93
|
+
[starts_at, full_length]
|
94
|
+
end
|
95
|
+
|
96
|
+
def coded_offset
|
97
|
+
'@%d+%d' % offset
|
98
|
+
end
|
99
|
+
|
100
|
+
def nesting_level=(lvl)
|
101
|
+
@nesting_level = lvl
|
102
|
+
quantifier && quantifier.nesting_level = lvl
|
103
|
+
terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
|
104
|
+
end
|
105
|
+
|
106
|
+
def quantifier=(qtf)
|
107
|
+
@quantifier = qtf
|
108
|
+
@repetitions = nil # clear memoized value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -1,29 +1,25 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
6
5
|
attr_accessor :expressions
|
7
6
|
|
8
7
|
def initialize(token, options = {})
|
9
|
-
super
|
10
|
-
|
11
8
|
self.expressions = []
|
9
|
+
super
|
12
10
|
end
|
13
11
|
|
14
12
|
# Override base method to clone the expressions as well.
|
15
|
-
def
|
16
|
-
self.expressions = orig.expressions.map
|
13
|
+
def initialize_copy(orig)
|
14
|
+
self.expressions = orig.expressions.map do |exp|
|
15
|
+
exp.clone.tap { |copy| copy.parent = self }
|
16
|
+
end
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
20
20
|
def <<(exp)
|
21
|
-
|
22
|
-
|
23
|
-
else
|
24
|
-
exp.nesting_level = nesting_level + 1
|
25
|
-
expressions << exp
|
26
|
-
end
|
21
|
+
exp.parent = self
|
22
|
+
expressions << exp
|
27
23
|
end
|
28
24
|
|
29
25
|
%w[[] at each empty? fetch index join last length values_at].each do |method|
|
@@ -41,19 +37,31 @@ module Regexp::Expression
|
|
41
37
|
end
|
42
38
|
|
43
39
|
def te
|
44
|
-
ts +
|
45
|
-
end
|
46
|
-
|
47
|
-
def to_s(format = :full)
|
48
|
-
# Note: the format does not get passed down to subexpressions.
|
49
|
-
"#{expressions.join}#{quantifier_affix(format)}"
|
40
|
+
ts + base_length
|
50
41
|
end
|
51
42
|
|
52
43
|
def to_h
|
53
|
-
attributes.merge(
|
44
|
+
attributes.merge(
|
54
45
|
text: to_s(:base),
|
55
46
|
expressions: expressions.map(&:to_h)
|
56
|
-
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
def extract_quantifier_target(quantifier_description)
|
51
|
+
pre_quantifier_decorations = []
|
52
|
+
target = expressions.reverse.find do |exp|
|
53
|
+
if exp.decorative?
|
54
|
+
exp.custom_to_s_handling = true
|
55
|
+
pre_quantifier_decorations << exp.text
|
56
|
+
next
|
57
|
+
end
|
58
|
+
exp
|
59
|
+
end
|
60
|
+
target or raise Regexp::Parser::ParserError,
|
61
|
+
"No valid target found for '#{quantifier_description}' quantifier"
|
62
|
+
|
63
|
+
target.pre_quantifier_decorations = pre_quantifier_decorations
|
64
|
+
target
|
57
65
|
end
|
58
66
|
end
|
59
67
|
end
|
@@ -1,138 +1,7 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class Base
|
4
|
-
attr_accessor :type, :token
|
5
|
-
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
|
-
|
8
|
-
attr_accessor :quantifier
|
9
|
-
attr_accessor :options
|
10
|
-
|
11
|
-
def initialize(token, options = {})
|
12
|
-
self.type = token.type
|
13
|
-
self.token = token.token
|
14
|
-
self.text = token.text
|
15
|
-
self.ts = token.ts
|
16
|
-
self.level = token.level
|
17
|
-
self.set_level = token.set_level
|
18
|
-
self.conditional_level = token.conditional_level
|
19
|
-
self.nesting_level = 0
|
20
|
-
self.quantifier = nil
|
21
|
-
self.options = options
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize_clone(orig)
|
25
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
|
-
super
|
29
|
-
end
|
30
|
-
|
31
|
-
def to_re(format = :full)
|
32
|
-
::Regexp.new(to_s(format))
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :starts_at :ts
|
36
|
-
|
37
|
-
def full_length
|
38
|
-
to_s.length
|
39
|
-
end
|
40
|
-
|
41
|
-
def offset
|
42
|
-
[starts_at, full_length]
|
43
|
-
end
|
44
|
-
|
45
|
-
def coded_offset
|
46
|
-
'@%d+%d' % offset
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s(format = :full)
|
50
|
-
"#{text}#{quantifier_affix(format)}"
|
51
|
-
end
|
52
|
-
|
53
|
-
def quantifier_affix(expression_format)
|
54
|
-
quantifier.to_s if quantified? && expression_format != :base
|
55
|
-
end
|
56
|
-
|
57
|
-
def terminal?
|
58
|
-
!respond_to?(:expressions)
|
59
|
-
end
|
60
|
-
|
61
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
62
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
63
|
-
end
|
64
|
-
|
65
|
-
def unquantified_clone
|
66
|
-
clone.tap { |exp| exp.quantifier = nil }
|
67
|
-
end
|
68
|
-
|
69
|
-
def quantified?
|
70
|
-
!quantifier.nil?
|
71
|
-
end
|
72
|
-
|
73
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
74
|
-
def quantity
|
75
|
-
return [nil,nil] unless quantified?
|
76
|
-
[quantifier.min, quantifier.max]
|
77
|
-
end
|
78
|
-
|
79
|
-
def repetitions
|
80
|
-
return 1..1 unless quantified?
|
81
|
-
min = quantifier.min
|
82
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
83
|
-
# fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
|
84
|
-
(min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
|
85
|
-
end
|
86
|
-
|
87
|
-
def greedy?
|
88
|
-
quantified? and quantifier.greedy?
|
89
|
-
end
|
90
|
-
|
91
|
-
def reluctant?
|
92
|
-
quantified? and quantifier.reluctant?
|
93
|
-
end
|
94
|
-
alias :lazy? :reluctant?
|
95
|
-
|
96
|
-
def possessive?
|
97
|
-
quantified? and quantifier.possessive?
|
98
|
-
end
|
99
|
-
|
100
|
-
def attributes
|
101
|
-
{
|
102
|
-
type: type,
|
103
|
-
token: token,
|
104
|
-
text: to_s(:base),
|
105
|
-
starts_at: ts,
|
106
|
-
length: full_length,
|
107
|
-
level: level,
|
108
|
-
set_level: set_level,
|
109
|
-
conditional_level: conditional_level,
|
110
|
-
options: options,
|
111
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
112
|
-
}
|
113
|
-
end
|
114
|
-
alias :to_h :attributes
|
115
|
-
end
|
116
|
-
|
117
|
-
def self.parsed(exp)
|
118
|
-
warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
|
119
|
-
'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
|
120
|
-
case exp
|
121
|
-
when String
|
122
|
-
Regexp::Parser.parse(exp)
|
123
|
-
when Regexp
|
124
|
-
Regexp::Parser.parse(exp.source) # <- causes loss of root options
|
125
|
-
when Regexp::Expression # <- never triggers
|
126
|
-
exp
|
127
|
-
else
|
128
|
-
raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
|
129
|
-
'a Regexp::Expression as a value for exp, but it '\
|
130
|
-
"was given #{exp.class.name}."
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
end # module Regexp::Expression
|
1
|
+
require 'regexp_parser/error'
|
135
2
|
|
3
|
+
require 'regexp_parser/expression/shared'
|
4
|
+
require 'regexp_parser/expression/base'
|
136
5
|
require 'regexp_parser/expression/quantifier'
|
137
6
|
require 'regexp_parser/expression/subexpression'
|
138
7
|
require 'regexp_parser/expression/sequence'
|
@@ -140,24 +9,28 @@ require 'regexp_parser/expression/sequence_operation'
|
|
140
9
|
|
141
10
|
require 'regexp_parser/expression/classes/alternation'
|
142
11
|
require 'regexp_parser/expression/classes/anchor'
|
143
|
-
require 'regexp_parser/expression/classes/
|
12
|
+
require 'regexp_parser/expression/classes/backreference'
|
13
|
+
require 'regexp_parser/expression/classes/character_set'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
144
17
|
require 'regexp_parser/expression/classes/conditional'
|
145
|
-
require 'regexp_parser/expression/classes/
|
18
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
146
19
|
require 'regexp_parser/expression/classes/free_space'
|
147
20
|
require 'regexp_parser/expression/classes/group'
|
148
21
|
require 'regexp_parser/expression/classes/keep'
|
149
22
|
require 'regexp_parser/expression/classes/literal'
|
150
23
|
require 'regexp_parser/expression/classes/posix_class'
|
151
|
-
require 'regexp_parser/expression/classes/property'
|
152
24
|
require 'regexp_parser/expression/classes/root'
|
153
|
-
require 'regexp_parser/expression/classes/
|
154
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
155
|
-
require 'regexp_parser/expression/classes/set/range'
|
156
|
-
require 'regexp_parser/expression/classes/type'
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
157
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
28
|
+
require 'regexp_parser/expression/methods/human_name'
|
158
29
|
require 'regexp_parser/expression/methods/match'
|
159
30
|
require 'regexp_parser/expression/methods/match_length'
|
160
31
|
require 'regexp_parser/expression/methods/options'
|
32
|
+
require 'regexp_parser/expression/methods/parts'
|
33
|
+
require 'regexp_parser/expression/methods/printing'
|
161
34
|
require 'regexp_parser/expression/methods/strfregexp'
|
162
35
|
require 'regexp_parser/expression/methods/tests'
|
163
36
|
require 'regexp_parser/expression/methods/traverse'
|