regexp_parser 1.7.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +364 -22
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/README.md +124 -88
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +295 -368
- data/lib/regexp_parser/scanner.rb +1405 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +49 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,26 +1,20 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
3
|
+
# call super in #initialize, but raise in #quantifier= and #quantify,
|
4
|
+
# or introduce an Expression::Quantifiable intermediate class.
|
5
|
+
# Or actually allow chaining as a more concise but tricky solution than PR#69.
|
2
6
|
class Quantifier
|
3
|
-
|
7
|
+
include Regexp::Expression::Shared
|
4
8
|
|
5
|
-
|
9
|
+
MODES = %i[greedy possessive reluctant]
|
6
10
|
|
7
|
-
def initialize(
|
8
|
-
|
9
|
-
@text = text
|
10
|
-
@mode = mode
|
11
|
-
@min = min
|
12
|
-
@max = max
|
13
|
-
end
|
14
|
-
|
15
|
-
def initialize_clone(orig)
|
16
|
-
@text = orig.text.dup
|
17
|
-
super
|
18
|
-
end
|
11
|
+
def initialize(*args)
|
12
|
+
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|
19
13
|
|
20
|
-
|
21
|
-
|
14
|
+
init_from_token_and_options(*args)
|
15
|
+
# TODO: remove in v3.0.0, stop removing parts of #token (?)
|
16
|
+
self.token = token.to_s.sub(/_(greedy|possessive|reluctant)/, '').to_sym
|
22
17
|
end
|
23
|
-
alias :to_str :to_s
|
24
18
|
|
25
19
|
def to_h
|
26
20
|
{
|
@@ -40,5 +34,51 @@ module Regexp::Expression
|
|
40
34
|
RUBY
|
41
35
|
end
|
42
36
|
alias :lazy? :reluctant?
|
37
|
+
|
38
|
+
def min
|
39
|
+
derived_data[:min]
|
40
|
+
end
|
41
|
+
|
42
|
+
def max
|
43
|
+
derived_data[:max]
|
44
|
+
end
|
45
|
+
|
46
|
+
def mode
|
47
|
+
derived_data[:mode]
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def deprecated_old_init(token, text, _min, _max, _mode = :greedy)
|
53
|
+
warn "Calling `Expression::Base#quantify` or `#{self.class}.new` with 4+ arguments "\
|
54
|
+
"is deprecated.\nIt will no longer be supported in regexp_parser v3.0.0.\n"\
|
55
|
+
"Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode` "\
|
56
|
+
"with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode "\
|
57
|
+
"will be derived automatically.\n"\
|
58
|
+
"Or do `exp.quantifier = #{self.class}.construct(token: token, text: str)`.\n"\
|
59
|
+
"This is consistent with how Expression::Base instances are created. "
|
60
|
+
@token = token
|
61
|
+
@text = text
|
62
|
+
end
|
63
|
+
|
64
|
+
def derived_data
|
65
|
+
@derived_data ||= begin
|
66
|
+
min, max =
|
67
|
+
case text[0]
|
68
|
+
when '?'; [0, 1]
|
69
|
+
when '*'; [0, -1]
|
70
|
+
when '+'; [1, -1]
|
71
|
+
else
|
72
|
+
int_min = text[/\{(\d*)/, 1]
|
73
|
+
int_max = text[/,?(\d*)\}/, 1]
|
74
|
+
[int_min.to_i, (int_max.empty? ? -1 : int_max.to_i)]
|
75
|
+
end
|
76
|
+
|
77
|
+
mod = text[/.([?+])/, 1]
|
78
|
+
mode = (mod == '?' && :reluctant) || (mod == '+' && :possessive) || :greedy
|
79
|
+
|
80
|
+
{ min: min, max: max, mode: mode }
|
81
|
+
end
|
82
|
+
end
|
43
83
|
end
|
44
84
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -7,61 +6,26 @@ module Regexp::Expression
|
|
7
6
|
# Used as the base class for the Alternation alternatives, Conditional
|
8
7
|
# branches, and CharacterSet::Intersection intersected sequences.
|
9
8
|
class Sequence < Regexp::Expression::Subexpression
|
10
|
-
# TODO: this override is here for backwards compatibility, remove in 2.0.0
|
11
|
-
def initialize(*args)
|
12
|
-
if args.count == 3
|
13
|
-
warn('WARNING: Sequence.new without a Regexp::Token argument is '\
|
14
|
-
'deprecated and will be removed in 2.0.0.')
|
15
|
-
return self.class.at_levels(*args)
|
16
|
-
end
|
17
|
-
super
|
18
|
-
end
|
19
|
-
|
20
9
|
class << self
|
21
|
-
def add_to(
|
22
|
-
sequence =
|
23
|
-
|
24
|
-
|
25
|
-
params[:conditional_level] ||
|
10
|
+
def add_to(exp, params = {}, active_opts = {})
|
11
|
+
sequence = construct(
|
12
|
+
level: exp.level,
|
13
|
+
set_level: exp.set_level,
|
14
|
+
conditional_level: params[:conditional_level] || exp.conditional_level,
|
15
|
+
ts: params[:ts],
|
26
16
|
)
|
27
|
-
sequence.nesting_level = subexpression.nesting_level + 1
|
28
17
|
sequence.options = active_opts
|
29
|
-
|
18
|
+
exp.expressions << sequence
|
30
19
|
sequence
|
31
20
|
end
|
32
|
-
|
33
|
-
def at_levels(level, set_level, conditional_level)
|
34
|
-
token = Regexp::Token.new(
|
35
|
-
:expression,
|
36
|
-
:sequence,
|
37
|
-
'',
|
38
|
-
nil, # ts
|
39
|
-
nil, # te
|
40
|
-
level,
|
41
|
-
set_level,
|
42
|
-
conditional_level
|
43
|
-
)
|
44
|
-
new(token)
|
45
|
-
end
|
46
21
|
end
|
47
22
|
|
48
|
-
def
|
49
|
-
expressions.first.
|
23
|
+
def ts
|
24
|
+
(head = expressions.first) ? head.ts : @ts
|
50
25
|
end
|
51
|
-
alias :ts :starts_at
|
52
|
-
|
53
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
54
|
-
offset = -1
|
55
|
-
target = expressions[offset]
|
56
|
-
while target.is_a?(FreeSpace)
|
57
|
-
target = expressions[offset -= 1]
|
58
|
-
end
|
59
26
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
target.quantify(token, text, min, max, mode)
|
27
|
+
def quantify(token, *args)
|
28
|
+
extract_quantifier_target(token.text).quantify(token, *args)
|
64
29
|
end
|
65
30
|
end
|
66
|
-
|
67
31
|
end
|
@@ -5,21 +5,16 @@ module Regexp::Expression
|
|
5
5
|
alias :operands :expressions
|
6
6
|
alias :operator :text
|
7
7
|
|
8
|
-
def
|
9
|
-
expressions.first.
|
8
|
+
def ts
|
9
|
+
(head = expressions.first) ? head.ts : @ts
|
10
10
|
end
|
11
|
-
alias :ts :starts_at
|
12
11
|
|
13
12
|
def <<(exp)
|
14
13
|
expressions.last << exp
|
15
14
|
end
|
16
15
|
|
17
|
-
def add_sequence(active_opts = {})
|
18
|
-
self.class::OPERAND.add_to(self,
|
19
|
-
end
|
20
|
-
|
21
|
-
def to_s(format = :full)
|
22
|
-
sequences.map { |e| e.to_s(format) }.join(text)
|
16
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
17
|
+
self.class::OPERAND.add_to(self, params, active_opts)
|
23
18
|
end
|
24
19
|
end
|
25
20
|
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods; end # filled in ./methods/*.rb
|
4
|
+
|
5
|
+
def self.included(mod)
|
6
|
+
mod.class_eval do
|
7
|
+
extend Shared::ClassMethods
|
8
|
+
|
9
|
+
attr_accessor :type, :token, :text, :ts, :te,
|
10
|
+
:level, :set_level, :conditional_level,
|
11
|
+
:options, :parent,
|
12
|
+
:custom_to_s_handling, :pre_quantifier_decorations
|
13
|
+
|
14
|
+
attr_reader :nesting_level, :quantifier
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def init_from_token_and_options(token, options = {})
|
19
|
+
self.type = token.type
|
20
|
+
self.token = token.token
|
21
|
+
self.text = token.text
|
22
|
+
self.ts = token.ts
|
23
|
+
self.te = token.te
|
24
|
+
self.level = token.level
|
25
|
+
self.set_level = token.set_level
|
26
|
+
self.conditional_level = token.conditional_level
|
27
|
+
self.nesting_level = 0
|
28
|
+
self.options = options || {}
|
29
|
+
end
|
30
|
+
private :init_from_token_and_options
|
31
|
+
|
32
|
+
def initialize_copy(orig)
|
33
|
+
self.text = orig.text.dup if orig.text
|
34
|
+
self.options = orig.options.dup if orig.options
|
35
|
+
self.quantifier = orig.quantifier.clone if orig.quantifier
|
36
|
+
self.parent = nil # updated by Subexpression#initialize_copy
|
37
|
+
if orig.pre_quantifier_decorations
|
38
|
+
self.pre_quantifier_decorations = orig.pre_quantifier_decorations.map(&:dup)
|
39
|
+
end
|
40
|
+
super
|
41
|
+
end
|
42
|
+
|
43
|
+
def starts_at
|
44
|
+
ts
|
45
|
+
end
|
46
|
+
|
47
|
+
def ends_at(include_quantifier = true)
|
48
|
+
ts + (include_quantifier ? full_length : base_length)
|
49
|
+
end
|
50
|
+
|
51
|
+
def base_length
|
52
|
+
to_s(:base).length
|
53
|
+
end
|
54
|
+
|
55
|
+
def full_length
|
56
|
+
to_s(:original).length
|
57
|
+
end
|
58
|
+
|
59
|
+
# #to_s reproduces the original source, as an unparser would.
|
60
|
+
#
|
61
|
+
# It takes an optional format argument.
|
62
|
+
#
|
63
|
+
# Example:
|
64
|
+
#
|
65
|
+
# lit = Regexp::Parser.parse(/a +/x)[0]
|
66
|
+
#
|
67
|
+
# lit.to_s # => 'a+' # default; with quantifier
|
68
|
+
# lit.to_s(:full) # => 'a+' # default; with quantifier
|
69
|
+
# lit.to_s(:base) # => 'a' # without quantifier
|
70
|
+
# lit.to_s(:original) # => 'a +' # with quantifier AND intermittent decorations
|
71
|
+
#
|
72
|
+
def to_s(format = :full)
|
73
|
+
base = parts.each_with_object(''.dup) do |part, buff|
|
74
|
+
if part.instance_of?(String)
|
75
|
+
buff << part
|
76
|
+
elsif !part.custom_to_s_handling
|
77
|
+
buff << part.to_s(:original)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
"#{base}#{pre_quantifier_decoration(format)}#{quantifier_affix(format)}"
|
81
|
+
end
|
82
|
+
alias :to_str :to_s
|
83
|
+
|
84
|
+
def pre_quantifier_decoration(expression_format = :original)
|
85
|
+
pre_quantifier_decorations.to_a.join if expression_format == :original
|
86
|
+
end
|
87
|
+
|
88
|
+
def quantifier_affix(expression_format = :full)
|
89
|
+
quantifier.to_s if quantified? && expression_format != :base
|
90
|
+
end
|
91
|
+
|
92
|
+
def offset
|
93
|
+
[starts_at, full_length]
|
94
|
+
end
|
95
|
+
|
96
|
+
def coded_offset
|
97
|
+
'@%d+%d' % offset
|
98
|
+
end
|
99
|
+
|
100
|
+
def nesting_level=(lvl)
|
101
|
+
@nesting_level = lvl
|
102
|
+
quantifier && quantifier.nesting_level = lvl
|
103
|
+
terminal? || each { |subexp| subexp.nesting_level = lvl + 1 }
|
104
|
+
end
|
105
|
+
|
106
|
+
def quantifier=(qtf)
|
107
|
+
@quantifier = qtf
|
108
|
+
@repetitions = nil # clear memoized value
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -1,29 +1,25 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
6
5
|
attr_accessor :expressions
|
7
6
|
|
8
7
|
def initialize(token, options = {})
|
9
|
-
super
|
10
|
-
|
11
8
|
self.expressions = []
|
9
|
+
super
|
12
10
|
end
|
13
11
|
|
14
12
|
# Override base method to clone the expressions as well.
|
15
|
-
def
|
16
|
-
self.expressions = orig.expressions.map
|
13
|
+
def initialize_copy(orig)
|
14
|
+
self.expressions = orig.expressions.map do |exp|
|
15
|
+
exp.clone.tap { |copy| copy.parent = self }
|
16
|
+
end
|
17
17
|
super
|
18
18
|
end
|
19
19
|
|
20
20
|
def <<(exp)
|
21
|
-
|
22
|
-
|
23
|
-
else
|
24
|
-
exp.nesting_level = nesting_level + 1
|
25
|
-
expressions << exp
|
26
|
-
end
|
21
|
+
exp.parent = self
|
22
|
+
expressions << exp
|
27
23
|
end
|
28
24
|
|
29
25
|
%w[[] at each empty? fetch index join last length values_at].each do |method|
|
@@ -41,19 +37,31 @@ module Regexp::Expression
|
|
41
37
|
end
|
42
38
|
|
43
39
|
def te
|
44
|
-
ts +
|
45
|
-
end
|
46
|
-
|
47
|
-
def to_s(format = :full)
|
48
|
-
# Note: the format does not get passed down to subexpressions.
|
49
|
-
"#{expressions.join}#{quantifier_affix(format)}"
|
40
|
+
ts + base_length
|
50
41
|
end
|
51
42
|
|
52
43
|
def to_h
|
53
|
-
attributes.merge(
|
44
|
+
attributes.merge(
|
54
45
|
text: to_s(:base),
|
55
46
|
expressions: expressions.map(&:to_h)
|
56
|
-
|
47
|
+
)
|
48
|
+
end
|
49
|
+
|
50
|
+
def extract_quantifier_target(quantifier_description)
|
51
|
+
pre_quantifier_decorations = []
|
52
|
+
target = expressions.reverse.find do |exp|
|
53
|
+
if exp.decorative?
|
54
|
+
exp.custom_to_s_handling = true
|
55
|
+
pre_quantifier_decorations << exp.text
|
56
|
+
next
|
57
|
+
end
|
58
|
+
exp
|
59
|
+
end
|
60
|
+
target or raise Regexp::Parser::ParserError,
|
61
|
+
"No valid target found for '#{quantifier_description}' quantifier"
|
62
|
+
|
63
|
+
target.pre_quantifier_decorations = pre_quantifier_decorations
|
64
|
+
target
|
57
65
|
end
|
58
66
|
end
|
59
67
|
end
|
@@ -1,138 +1,7 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class Base
|
4
|
-
attr_accessor :type, :token
|
5
|
-
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
|
-
|
8
|
-
attr_accessor :quantifier
|
9
|
-
attr_accessor :options
|
10
|
-
|
11
|
-
def initialize(token, options = {})
|
12
|
-
self.type = token.type
|
13
|
-
self.token = token.token
|
14
|
-
self.text = token.text
|
15
|
-
self.ts = token.ts
|
16
|
-
self.level = token.level
|
17
|
-
self.set_level = token.set_level
|
18
|
-
self.conditional_level = token.conditional_level
|
19
|
-
self.nesting_level = 0
|
20
|
-
self.quantifier = nil
|
21
|
-
self.options = options
|
22
|
-
end
|
23
|
-
|
24
|
-
def initialize_clone(orig)
|
25
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
26
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
27
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
28
|
-
super
|
29
|
-
end
|
30
|
-
|
31
|
-
def to_re(format = :full)
|
32
|
-
::Regexp.new(to_s(format))
|
33
|
-
end
|
34
|
-
|
35
|
-
alias :starts_at :ts
|
36
|
-
|
37
|
-
def full_length
|
38
|
-
to_s.length
|
39
|
-
end
|
40
|
-
|
41
|
-
def offset
|
42
|
-
[starts_at, full_length]
|
43
|
-
end
|
44
|
-
|
45
|
-
def coded_offset
|
46
|
-
'@%d+%d' % offset
|
47
|
-
end
|
48
|
-
|
49
|
-
def to_s(format = :full)
|
50
|
-
"#{text}#{quantifier_affix(format)}"
|
51
|
-
end
|
52
|
-
|
53
|
-
def quantifier_affix(expression_format)
|
54
|
-
quantifier.to_s if quantified? && expression_format != :base
|
55
|
-
end
|
56
|
-
|
57
|
-
def terminal?
|
58
|
-
!respond_to?(:expressions)
|
59
|
-
end
|
60
|
-
|
61
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
62
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
63
|
-
end
|
64
|
-
|
65
|
-
def unquantified_clone
|
66
|
-
clone.tap { |exp| exp.quantifier = nil }
|
67
|
-
end
|
68
|
-
|
69
|
-
def quantified?
|
70
|
-
!quantifier.nil?
|
71
|
-
end
|
72
|
-
|
73
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
74
|
-
def quantity
|
75
|
-
return [nil,nil] unless quantified?
|
76
|
-
[quantifier.min, quantifier.max]
|
77
|
-
end
|
78
|
-
|
79
|
-
def repetitions
|
80
|
-
return 1..1 unless quantified?
|
81
|
-
min = quantifier.min
|
82
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
83
|
-
# fix Range#minmax - https://bugs.ruby-lang.org/issues/15807
|
84
|
-
(min..max).tap { |r| r.define_singleton_method(:minmax) { [min, max] } }
|
85
|
-
end
|
86
|
-
|
87
|
-
def greedy?
|
88
|
-
quantified? and quantifier.greedy?
|
89
|
-
end
|
90
|
-
|
91
|
-
def reluctant?
|
92
|
-
quantified? and quantifier.reluctant?
|
93
|
-
end
|
94
|
-
alias :lazy? :reluctant?
|
95
|
-
|
96
|
-
def possessive?
|
97
|
-
quantified? and quantifier.possessive?
|
98
|
-
end
|
99
|
-
|
100
|
-
def attributes
|
101
|
-
{
|
102
|
-
type: type,
|
103
|
-
token: token,
|
104
|
-
text: to_s(:base),
|
105
|
-
starts_at: ts,
|
106
|
-
length: full_length,
|
107
|
-
level: level,
|
108
|
-
set_level: set_level,
|
109
|
-
conditional_level: conditional_level,
|
110
|
-
options: options,
|
111
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
112
|
-
}
|
113
|
-
end
|
114
|
-
alias :to_h :attributes
|
115
|
-
end
|
116
|
-
|
117
|
-
def self.parsed(exp)
|
118
|
-
warn('WARNING: Regexp::Expression::Base.parsed is buggy and '\
|
119
|
-
'will be removed in 2.0.0. Use Regexp::Parser.parse instead.')
|
120
|
-
case exp
|
121
|
-
when String
|
122
|
-
Regexp::Parser.parse(exp)
|
123
|
-
when Regexp
|
124
|
-
Regexp::Parser.parse(exp.source) # <- causes loss of root options
|
125
|
-
when Regexp::Expression # <- never triggers
|
126
|
-
exp
|
127
|
-
else
|
128
|
-
raise ArgumentError, 'Expression.parsed accepts a String, Regexp, or '\
|
129
|
-
'a Regexp::Expression as a value for exp, but it '\
|
130
|
-
"was given #{exp.class.name}."
|
131
|
-
end
|
132
|
-
end
|
133
|
-
|
134
|
-
end # module Regexp::Expression
|
1
|
+
require 'regexp_parser/error'
|
135
2
|
|
3
|
+
require 'regexp_parser/expression/shared'
|
4
|
+
require 'regexp_parser/expression/base'
|
136
5
|
require 'regexp_parser/expression/quantifier'
|
137
6
|
require 'regexp_parser/expression/subexpression'
|
138
7
|
require 'regexp_parser/expression/sequence'
|
@@ -140,24 +9,28 @@ require 'regexp_parser/expression/sequence_operation'
|
|
140
9
|
|
141
10
|
require 'regexp_parser/expression/classes/alternation'
|
142
11
|
require 'regexp_parser/expression/classes/anchor'
|
143
|
-
require 'regexp_parser/expression/classes/
|
12
|
+
require 'regexp_parser/expression/classes/backreference'
|
13
|
+
require 'regexp_parser/expression/classes/character_set'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
15
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
16
|
+
require 'regexp_parser/expression/classes/character_type'
|
144
17
|
require 'regexp_parser/expression/classes/conditional'
|
145
|
-
require 'regexp_parser/expression/classes/
|
18
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
146
19
|
require 'regexp_parser/expression/classes/free_space'
|
147
20
|
require 'regexp_parser/expression/classes/group'
|
148
21
|
require 'regexp_parser/expression/classes/keep'
|
149
22
|
require 'regexp_parser/expression/classes/literal'
|
150
23
|
require 'regexp_parser/expression/classes/posix_class'
|
151
|
-
require 'regexp_parser/expression/classes/property'
|
152
24
|
require 'regexp_parser/expression/classes/root'
|
153
|
-
require 'regexp_parser/expression/classes/
|
154
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
155
|
-
require 'regexp_parser/expression/classes/set/range'
|
156
|
-
require 'regexp_parser/expression/classes/type'
|
25
|
+
require 'regexp_parser/expression/classes/unicode_property'
|
157
26
|
|
27
|
+
require 'regexp_parser/expression/methods/construct'
|
28
|
+
require 'regexp_parser/expression/methods/human_name'
|
158
29
|
require 'regexp_parser/expression/methods/match'
|
159
30
|
require 'regexp_parser/expression/methods/match_length'
|
160
31
|
require 'regexp_parser/expression/methods/options'
|
32
|
+
require 'regexp_parser/expression/methods/parts'
|
33
|
+
require 'regexp_parser/expression/methods/printing'
|
161
34
|
require 'regexp_parser/expression/methods/strfregexp'
|
162
35
|
require 'regexp_parser/expression/methods/tests'
|
163
36
|
require 'regexp_parser/expression/methods/traverse'
|