regexp_parser 1.7.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +364 -22
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/README.md +124 -88
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +295 -368
- data/lib/regexp_parser/scanner.rb +1405 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +49 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
@@ -1,23 +1,19 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
|
-
complete?
|
9
|
+
complete? and raise Regexp::Parser::Error,
|
10
|
+
"Can't add more than 2 expressions to a Range"
|
11
11
|
super
|
12
12
|
end
|
13
13
|
|
14
14
|
def complete?
|
15
15
|
count == 2
|
16
16
|
end
|
17
|
-
|
18
|
-
def to_s(_format = :full)
|
19
|
-
expressions.join(text)
|
20
|
-
end
|
21
17
|
end
|
22
18
|
end
|
23
19
|
end
|
@@ -19,9 +19,8 @@ module Regexp::Expression
|
|
19
19
|
def close
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
|
-
|
23
|
-
def to_s(format = :full)
|
24
|
-
"#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
|
25
|
-
end
|
26
22
|
end
|
23
|
+
|
24
|
+
# alias for symmetry between token symbol and Expression class name
|
25
|
+
Set = CharacterSet
|
27
26
|
end # module Regexp::Expression
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Conditional
|
3
|
-
class TooManyBranches <
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
4
4
|
def initialize
|
5
5
|
super('The conditional expression has more than 2 branches')
|
6
6
|
end
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
15
15
|
ref = text.tr("'<>()", "")
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
17
|
end
|
18
|
+
|
19
|
+
def initialize_copy(orig)
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
21
|
+
super
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -26,9 +31,9 @@ module Regexp::Expression
|
|
26
31
|
expressions.last << exp
|
27
32
|
end
|
28
33
|
|
29
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
30
35
|
raise TooManyBranches.new if branches.length == 2
|
31
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
32
37
|
Branch.add_to(self, params, active_opts)
|
33
38
|
end
|
34
39
|
alias :branch :add_sequence
|
@@ -50,8 +55,9 @@ module Regexp::Expression
|
|
50
55
|
condition.reference
|
51
56
|
end
|
52
57
|
|
53
|
-
def
|
54
|
-
|
58
|
+
def initialize_copy(orig)
|
59
|
+
self.referenced_expression = orig.referenced_expression.dup
|
60
|
+
super
|
55
61
|
end
|
56
62
|
end
|
57
63
|
end
|
@@ -1,16 +1,21 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module EscapeSequence
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
def char
|
7
|
-
# poor man's unescape without using eval
|
8
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
9
|
-
end
|
10
|
-
|
11
4
|
def codepoint
|
12
5
|
char.ord
|
13
6
|
end
|
7
|
+
|
8
|
+
if ''.respond_to?(:undump)
|
9
|
+
def char
|
10
|
+
%("#{text}").undump
|
11
|
+
end
|
12
|
+
else
|
13
|
+
# poor man's unescape without using eval
|
14
|
+
require 'yaml'
|
15
|
+
def char
|
16
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
17
|
+
end
|
18
|
+
end
|
14
19
|
end
|
15
20
|
|
16
21
|
class Literal < EscapeSequence::Base
|
@@ -91,4 +96,7 @@ module Regexp::Expression
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
94
102
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
|
-
def quantify(
|
5
|
-
raise
|
3
|
+
def quantify(*_args)
|
4
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
5
|
end
|
7
6
|
end
|
8
7
|
|
9
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
10
10
|
|
11
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
12
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
13
14
|
text << exp.text
|
14
15
|
end
|
15
16
|
end
|
16
|
-
|
17
17
|
end
|
@@ -1,27 +1,45 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
"#{text}#{expressions.join})#{quantifier_affix(format)}"
|
6
|
-
end
|
4
|
+
end
|
7
5
|
|
8
|
-
|
6
|
+
class Passive < Group::Base
|
7
|
+
attr_writer :implicit
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
@implicit = false
|
11
|
+
super
|
12
|
+
end
|
9
13
|
|
10
|
-
def
|
14
|
+
def implicit?
|
15
|
+
@implicit
|
16
|
+
end
|
11
17
|
end
|
12
18
|
|
13
|
-
class Atomic < Group::Base; end
|
14
|
-
class Passive < Group::Base; end
|
15
19
|
class Absence < Group::Base; end
|
20
|
+
class Atomic < Group::Base; end
|
21
|
+
# TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
|
22
|
+
# longer inherit from Group because it is effectively a terminal expression.
|
16
23
|
class Options < Group::Base
|
17
24
|
attr_accessor :option_changes
|
25
|
+
|
26
|
+
def initialize_copy(orig)
|
27
|
+
self.option_changes = orig.option_changes.dup
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
def quantify(*args)
|
32
|
+
if token == :options_switch
|
33
|
+
raise Regexp::Parser::Error, 'Can not quantify an option switch'
|
34
|
+
else
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
18
38
|
end
|
19
39
|
|
20
40
|
class Capture < Group::Base
|
21
41
|
attr_accessor :number, :number_at_level
|
22
42
|
alias identifier number
|
23
|
-
|
24
|
-
def capturing?; true end
|
25
43
|
end
|
26
44
|
|
27
45
|
class Named < Group::Capture
|
@@ -33,18 +51,13 @@ module Regexp::Expression
|
|
33
51
|
super
|
34
52
|
end
|
35
53
|
|
36
|
-
def
|
54
|
+
def initialize_copy(orig)
|
37
55
|
@name = orig.name.dup
|
38
56
|
super
|
39
57
|
end
|
40
58
|
end
|
41
59
|
|
42
60
|
class Comment < Group::Base
|
43
|
-
def to_s(_format = :full)
|
44
|
-
text.dup
|
45
|
-
end
|
46
|
-
|
47
|
-
def comment?; true end
|
48
61
|
end
|
49
62
|
end
|
50
63
|
|
@@ -1,24 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
'will be removed in 2.0.0. Use Root.build for the old behavior.')
|
9
|
-
return super(self.class.build_token, *args)
|
10
|
-
end
|
11
|
-
super
|
12
|
-
end
|
13
|
-
|
14
|
-
class << self
|
15
|
-
def build(options = {})
|
16
|
-
new(build_token, options)
|
17
|
-
end
|
18
|
-
|
19
|
-
def build_token
|
20
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
21
|
-
end
|
3
|
+
def self.build(options = {})
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
22
7
|
end
|
23
8
|
end
|
24
9
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -7,11 +6,11 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
|
9
8
|
def name
|
10
|
-
text
|
9
|
+
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
14
|
-
|
13
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
14
|
end
|
16
15
|
end
|
17
16
|
|
@@ -117,4 +116,7 @@ module Regexp::Expression
|
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
118
|
|
119
|
+
# alias for symmetry between token symbol and Expression class name
|
120
|
+
Property = UnicodeProperty
|
121
|
+
Nonproperty = UnicodeProperty
|
120
122
|
end # module Regexp::Expression
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
else
|
32
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def token_class
|
38
|
+
self.class.token_class
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation, e.g. "atomic group", "hex escape", "word type", ..
|
4
|
+
def human_name
|
5
|
+
[token, type].compact.join(' ').tr('_', ' ')
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
Alternation.class_eval { def human_name; 'alternation' end }
|
10
|
+
Alternative.class_eval { def human_name; 'alternative' end }
|
11
|
+
Anchor::BOL.class_eval { def human_name; 'beginning of line' end }
|
12
|
+
Anchor::BOS.class_eval { def human_name; 'beginning of string' end }
|
13
|
+
Anchor::EOL.class_eval { def human_name; 'end of line' end }
|
14
|
+
Anchor::EOS.class_eval { def human_name; 'end of string' end }
|
15
|
+
Anchor::EOSobEOL.class_eval { def human_name; 'newline-ready end of string' end }
|
16
|
+
Anchor::MatchStart.class_eval { def human_name; 'match start' end }
|
17
|
+
Anchor::NonWordBoundary.class_eval { def human_name; 'no word boundary' end }
|
18
|
+
Anchor::WordBoundary.class_eval { def human_name; 'word boundary' end }
|
19
|
+
Assertion::Lookahead.class_eval { def human_name; 'lookahead' end }
|
20
|
+
Assertion::Lookbehind.class_eval { def human_name; 'lookbehind' end }
|
21
|
+
Assertion::NegativeLookahead.class_eval { def human_name; 'negative lookahead' end }
|
22
|
+
Assertion::NegativeLookbehind.class_eval { def human_name; 'negative lookbehind' end }
|
23
|
+
Backreference::Name.class_eval { def human_name; 'backreference by name' end }
|
24
|
+
Backreference::NameCall.class_eval { def human_name; 'subexpression call by name' end }
|
25
|
+
Backreference::Number.class_eval { def human_name; 'backreference' end }
|
26
|
+
Backreference::NumberRelative.class_eval { def human_name; 'relative backreference' end }
|
27
|
+
Backreference::NumberCall.class_eval { def human_name; 'subexpression call' end }
|
28
|
+
Backreference::NumberCallRelative.class_eval { def human_name; 'relative subexpression call' end }
|
29
|
+
CharacterSet::IntersectedSequence.class_eval { def human_name; 'intersected sequence' end }
|
30
|
+
CharacterSet::Intersection.class_eval { def human_name; 'intersection' end }
|
31
|
+
CharacterSet::Range.class_eval { def human_name; 'character range' end }
|
32
|
+
CharacterType::Any.class_eval { def human_name; 'match-all' end }
|
33
|
+
Comment.class_eval { def human_name; 'comment' end }
|
34
|
+
Conditional::Branch.class_eval { def human_name; 'conditional branch' end }
|
35
|
+
Conditional::Condition.class_eval { def human_name; 'condition' end }
|
36
|
+
Conditional::Expression.class_eval { def human_name; 'conditional' end }
|
37
|
+
Group::Capture.class_eval { def human_name; "capture group #{number}" end }
|
38
|
+
Group::Named.class_eval { def human_name; 'named capture group' end }
|
39
|
+
Keep::Mark.class_eval { def human_name; 'keep-mark lookbehind' end }
|
40
|
+
Literal.class_eval { def human_name; 'literal' end }
|
41
|
+
Root.class_eval { def human_name; 'root' end }
|
42
|
+
WhiteSpace.class_eval { def human_name; 'free space' end }
|
43
|
+
end
|
@@ -10,7 +10,7 @@ class Regexp::MatchLength
|
|
10
10
|
self.exp_class = exp.class
|
11
11
|
self.min_rep = exp.repetitions.min
|
12
12
|
self.max_rep = exp.repetitions.max
|
13
|
-
if base = opts[:base]
|
13
|
+
if (base = opts[:base])
|
14
14
|
self.base_min = base
|
15
15
|
self.base_max = base
|
16
16
|
self.reify = ->{ '.' * base }
|
@@ -32,7 +32,7 @@ class Regexp::MatchLength
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
def endless_each
|
35
|
+
def endless_each
|
36
36
|
return enum_for(__method__) unless block_given?
|
37
37
|
(min..max).each { |num| yield(num) if include?(num) }
|
38
38
|
end
|
@@ -63,16 +63,20 @@ class Regexp::MatchLength
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def to_re
|
66
|
-
|
66
|
+
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
70
70
|
|
71
71
|
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
if Regexp.method_defined?(:match?) # ruby >= 2.4
|
74
|
+
def test_regexp
|
75
|
+
@test_regexp ||= /^#{to_re}$/
|
76
|
+
end
|
77
|
+
else
|
78
|
+
def test_regexp
|
79
|
+
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -112,7 +116,7 @@ module Regexp::Expression
|
|
112
116
|
end
|
113
117
|
|
114
118
|
def inner_match_length
|
115
|
-
dummy = Regexp::Expression::Root.
|
119
|
+
dummy = Regexp::Expression::Root.construct
|
116
120
|
dummy.expressions = expressions.map(&:clone)
|
117
121
|
dummy.quantifier = quantifier && quantifier.clone
|
118
122
|
dummy.match_length
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
2
|
+
module Shared
|
3
3
|
|
4
4
|
# Test if this expression has the given test_type, which can be either
|
5
5
|
# a symbol or an array of symbols to check against the expression's type.
|
@@ -93,5 +93,51 @@ module Regexp::Expression
|
|
93
93
|
"Array, Hash, or Symbol expected, #{scope.class.name} given"
|
94
94
|
end
|
95
95
|
end
|
96
|
+
|
97
|
+
# Deep-compare two expressions for equality.
|
98
|
+
#
|
99
|
+
# When changing the conditions, please make sure to update
|
100
|
+
# #pretty_print_instance_variables so that it includes all relevant values.
|
101
|
+
def ==(other)
|
102
|
+
self.class == other.class &&
|
103
|
+
text == other.text &&
|
104
|
+
quantifier == other.quantifier &&
|
105
|
+
options == other.options &&
|
106
|
+
(terminal? || expressions == other.expressions)
|
107
|
+
end
|
108
|
+
alias :=== :==
|
109
|
+
alias :eql? :==
|
110
|
+
|
111
|
+
def optional?
|
112
|
+
quantified? && quantifier.min == 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def quantified?
|
116
|
+
!quantifier.nil?
|
117
|
+
end
|
96
118
|
end
|
119
|
+
|
120
|
+
Shared.class_eval { def terminal?; self.class.terminal? end }
|
121
|
+
Shared::ClassMethods.class_eval { def terminal?; true end }
|
122
|
+
Subexpression.instance_eval { def terminal?; false end }
|
123
|
+
|
124
|
+
Shared.class_eval { def capturing?; self.class.capturing? end }
|
125
|
+
Shared::ClassMethods.class_eval { def capturing?; false end }
|
126
|
+
Group::Capture.instance_eval { def capturing?; true end }
|
127
|
+
|
128
|
+
Shared.class_eval { def comment?; self.class.comment? end }
|
129
|
+
Shared::ClassMethods.class_eval { def comment?; false end }
|
130
|
+
Comment.instance_eval { def comment?; true end }
|
131
|
+
Group::Comment.instance_eval { def comment?; true end }
|
132
|
+
|
133
|
+
Shared.class_eval { def decorative?; self.class.decorative? end }
|
134
|
+
Shared::ClassMethods.class_eval { def decorative?; false end }
|
135
|
+
FreeSpace.instance_eval { def decorative?; true end }
|
136
|
+
Group::Comment.instance_eval { def decorative?; true end }
|
137
|
+
|
138
|
+
Shared.class_eval { def referential?; self.class.referential? end }
|
139
|
+
Shared::ClassMethods.class_eval { def referential?; false end }
|
140
|
+
Backreference::Base.instance_eval { def referential?; true end }
|
141
|
+
Conditional::Condition.instance_eval { def referential?; true end }
|
142
|
+
Conditional::Expression.instance_eval { def referential?; true end }
|
97
143
|
end
|
@@ -1,6 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Subexpression < Regexp::Expression::Base
|
3
3
|
|
4
|
+
# Traverses the expression, passing each recursive child to the
|
5
|
+
# given block.
|
6
|
+
# If the block takes two arguments, the indices of the children within
|
7
|
+
# their parents are also passed to it.
|
8
|
+
def each_expression(include_self = false, &block)
|
9
|
+
return enum_for(__method__, include_self) unless block
|
10
|
+
|
11
|
+
if block.arity == 1
|
12
|
+
block.call(self) if include_self
|
13
|
+
each_expression_without_index(&block)
|
14
|
+
else
|
15
|
+
block.call(self, 0) if include_self
|
16
|
+
each_expression_with_index(&block)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
4
20
|
# Traverses the subexpression (depth-first, pre-order) and calls the given
|
5
21
|
# block for each expression with three arguments; the traversal event,
|
6
22
|
# the expression, and the index of the expression within its parent.
|
@@ -34,31 +50,31 @@ module Regexp::Expression
|
|
34
50
|
end
|
35
51
|
alias :walk :traverse
|
36
52
|
|
37
|
-
# Iterates over the expressions of this expression as an array, passing
|
38
|
-
# the expression and its index within its parent to the given block.
|
39
|
-
def each_expression(include_self = false, &block)
|
40
|
-
return enum_for(__method__, include_self) unless block_given?
|
41
|
-
|
42
|
-
traverse(include_self) do |event, exp, index|
|
43
|
-
yield(exp, index) unless event == :exit
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
53
|
# Returns a new array with the results of calling the given block once
|
48
54
|
# for every expression. If a block is not given, returns an array with
|
49
55
|
# each expression and its level index as an array.
|
50
56
|
def flat_map(include_self = false, &block)
|
51
|
-
|
57
|
+
case block && block.arity
|
58
|
+
when nil then each_expression(include_self).to_a
|
59
|
+
when 2 then each_expression(include_self).map(&block)
|
60
|
+
else each_expression(include_self).map { |exp| block.call(exp) }
|
61
|
+
end
|
62
|
+
end
|
52
63
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
64
|
+
protected
|
65
|
+
|
66
|
+
def each_expression_with_index(&block)
|
67
|
+
each_with_index do |exp, index|
|
68
|
+
block.call(exp, index)
|
69
|
+
exp.each_expression_with_index(&block) unless exp.terminal?
|
59
70
|
end
|
71
|
+
end
|
60
72
|
|
61
|
-
|
73
|
+
def each_expression_without_index(&block)
|
74
|
+
each do |exp|
|
75
|
+
block.call(exp)
|
76
|
+
exp.each_expression_without_index(&block) unless exp.terminal?
|
77
|
+
end
|
62
78
|
end
|
63
79
|
end
|
64
80
|
end
|