regexp_parser 2.6.0 → 2.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +5 -10
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -20
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +21 -91
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -10
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -20
- data/lib/regexp_parser/expression/subexpression.rb +20 -15
- data/lib/regexp_parser/expression.rb +37 -31
- data/lib/regexp_parser/lexer.rb +76 -36
- data/lib/regexp_parser/parser.rb +107 -103
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +101 -172
- data/lib/regexp_parser/scanner.rb +1171 -1365
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +17 -8
- data/CHANGELOG.md +0 -601
- data/README.md +0 -503
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
|
4
|
+
data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
|
7
|
+
data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
|
-
gem 'gouteur'
|
13
|
-
gem 'rubocop', '~> 1.
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -6,13 +6,6 @@ module Regexp::Expression
|
|
6
6
|
init_from_token_and_options(token, options)
|
7
7
|
end
|
8
8
|
|
9
|
-
def initialize_copy(orig)
|
10
|
-
self.text = orig.text.dup if orig.text
|
11
|
-
self.options = orig.options.dup if orig.options
|
12
|
-
self.quantifier = orig.quantifier.clone if orig.quantifier
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
9
|
def to_re(format = :full)
|
17
10
|
if set_level > 0
|
18
11
|
warn "Calling #to_re on character set members is deprecated - "\
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -1,21 +1,13 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
3
2
|
module Backreference
|
4
|
-
class Base < Regexp::Expression::Base
|
5
|
-
attr_accessor :referenced_expression
|
6
|
-
|
7
|
-
def initialize_copy(orig)
|
8
|
-
self.referenced_expression = orig.referenced_expression.dup
|
9
|
-
super
|
10
|
-
end
|
11
|
-
end
|
3
|
+
class Base < Regexp::Expression::Base; end
|
12
4
|
|
13
5
|
class Number < Backreference::Base
|
14
6
|
attr_reader :number
|
15
7
|
alias reference number
|
16
8
|
|
17
9
|
def initialize(token, options = {})
|
18
|
-
@number = token.text[
|
10
|
+
@number = token.text[/-?\d+/].to_i
|
19
11
|
super
|
20
12
|
end
|
21
13
|
end
|
@@ -58,4 +50,7 @@ module Regexp::Expression
|
|
58
50
|
end
|
59
51
|
end
|
60
52
|
end
|
53
|
+
|
54
|
+
# alias for symmetry between token symbol and Expression class name
|
55
|
+
Backref = Backreference
|
61
56
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
9
|
complete? and raise Regexp::Parser::Error,
|
@@ -15,10 +14,6 @@ module Regexp::Expression
|
|
15
14
|
def complete?
|
16
15
|
count == 2
|
17
16
|
end
|
18
|
-
|
19
|
-
def parts
|
20
|
-
intersperse(expressions, text.dup)
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -19,9 +16,8 @@ module Regexp::Expression
|
|
19
16
|
def close
|
20
17
|
self.closed = true
|
21
18
|
end
|
22
|
-
|
23
|
-
def parts
|
24
|
-
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
|
-
end
|
26
19
|
end
|
20
|
+
|
21
|
+
# alias for symmetry between token symbol and Expression class name
|
22
|
+
Set = CharacterSet
|
27
23
|
end # module Regexp::Expression
|
@@ -7,33 +7,24 @@ module Regexp::Expression
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class Condition < Regexp::Expression::Base
|
10
|
-
attr_accessor :referenced_expression
|
11
|
-
|
12
10
|
# Name or number of the referenced capturing group that determines state.
|
13
11
|
# Returns a String if reference is by name, Integer if by number.
|
14
12
|
def reference
|
15
13
|
ref = text.tr("'<>()", "")
|
16
14
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
15
|
end
|
18
|
-
|
19
|
-
def initialize_copy(orig)
|
20
|
-
self.referenced_expression = orig.referenced_expression.dup
|
21
|
-
super
|
22
|
-
end
|
23
16
|
end
|
24
17
|
|
25
18
|
class Branch < Regexp::Expression::Sequence; end
|
26
19
|
|
27
20
|
class Expression < Regexp::Expression::Subexpression
|
28
|
-
attr_accessor :referenced_expression
|
29
|
-
|
30
21
|
def <<(exp)
|
31
22
|
expressions.last << exp
|
32
23
|
end
|
33
24
|
|
34
|
-
def add_sequence(active_opts = {})
|
25
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
35
26
|
raise TooManyBranches.new if branches.length == 2
|
36
|
-
params = { conditional_level: conditional_level + 1 }
|
27
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
37
28
|
Branch.add_to(self, params, active_opts)
|
38
29
|
end
|
39
30
|
alias :branch :add_sequence
|
@@ -54,15 +45,6 @@ module Regexp::Expression
|
|
54
45
|
def reference
|
55
46
|
condition.reference
|
56
47
|
end
|
57
|
-
|
58
|
-
def parts
|
59
|
-
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
|
-
end
|
61
|
-
|
62
|
-
def initialize_copy(orig)
|
63
|
-
self.referenced_expression = orig.referenced_expression.dup
|
64
|
-
super
|
65
|
-
end
|
66
48
|
end
|
67
49
|
end
|
68
50
|
end
|
@@ -1,100 +1,30 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
|
3
2
|
module EscapeSequence
|
4
|
-
|
5
|
-
def codepoint
|
6
|
-
char.ord
|
7
|
-
end
|
3
|
+
Base = Class.new(Regexp::Expression::Base)
|
8
4
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
5
|
+
AsciiEscape = Class.new(Base) # \e
|
6
|
+
Backspace = Class.new(Base) # \b
|
7
|
+
Bell = Class.new(Base) # \a
|
8
|
+
FormFeed = Class.new(Base) # \f
|
9
|
+
Newline = Class.new(Base) # \n
|
10
|
+
Return = Class.new(Base) # \r
|
11
|
+
Tab = Class.new(Base) # \t
|
12
|
+
VerticalTab = Class.new(Base) # \v
|
21
13
|
|
22
|
-
|
23
|
-
def char
|
24
|
-
text[1..-1]
|
25
|
-
end
|
26
|
-
end
|
14
|
+
Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
|
27
15
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
class FormFeed < EscapeSequence::Base; end
|
32
|
-
class Newline < EscapeSequence::Base; end
|
33
|
-
class Return < EscapeSequence::Base; end
|
34
|
-
class Tab < EscapeSequence::Base; end
|
35
|
-
class VerticalTab < EscapeSequence::Base; end
|
16
|
+
Octal = Class.new(Base) # e.g. \012
|
17
|
+
Hex = Class.new(Base) # e.g. \x0A
|
18
|
+
Codepoint = Class.new(Base) # e.g. \u000A
|
36
19
|
|
37
|
-
|
38
|
-
class Codepoint < EscapeSequence::Base; end
|
20
|
+
CodepointList = Class.new(Base) # e.g. \u{A B}
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
def codepoint
|
46
|
-
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
47
|
-
end
|
48
|
-
|
49
|
-
def chars
|
50
|
-
codepoints.map { |cp| cp.chr('utf-8') }
|
51
|
-
end
|
52
|
-
|
53
|
-
def codepoints
|
54
|
-
text.scan(/\h+/).map(&:hex)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
class Octal < EscapeSequence::Base
|
59
|
-
def char
|
60
|
-
text[1..-1].to_i(8).chr('utf-8')
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
class AbstractMetaControlSequence < EscapeSequence::Base
|
65
|
-
def char
|
66
|
-
codepoint.chr('utf-8')
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
|
71
|
-
def control_sequence_to_s(control_sequence)
|
72
|
-
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
73
|
-
["000#{five_lsb}"].pack('B*')
|
74
|
-
end
|
75
|
-
|
76
|
-
def meta_char_to_codepoint(meta_char)
|
77
|
-
byte_value = meta_char.ord
|
78
|
-
byte_value < 128 ? byte_value + 128 : byte_value
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
class Control < AbstractMetaControlSequence
|
83
|
-
def codepoint
|
84
|
-
control_sequence_to_s(text).ord
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
class Meta < AbstractMetaControlSequence
|
89
|
-
def codepoint
|
90
|
-
meta_char_to_codepoint(text[-1])
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class MetaControl < AbstractMetaControlSequence
|
95
|
-
def codepoint
|
96
|
-
meta_char_to_codepoint(control_sequence_to_s(text))
|
97
|
-
end
|
98
|
-
end
|
22
|
+
AbstractMetaControlSequence = Class.new(Base)
|
23
|
+
Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
|
24
|
+
Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
|
25
|
+
MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
|
99
26
|
end
|
27
|
+
|
28
|
+
# alias for symmetry between Token::* and Expression::*
|
29
|
+
Escape = EscapeSequence
|
100
30
|
end
|
@@ -5,10 +5,12 @@ module Regexp::Expression
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
9
10
|
|
10
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
11
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
12
14
|
text << exp.text
|
13
15
|
end
|
14
16
|
end
|
@@ -1,13 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def parts
|
5
|
-
[text.dup, *expressions, ')']
|
6
|
-
end
|
7
|
-
|
8
|
-
def capturing?; false end
|
9
|
-
|
10
|
-
def comment?; false end
|
11
4
|
end
|
12
5
|
|
13
6
|
class Passive < Group::Base
|
@@ -18,14 +11,6 @@ module Regexp::Expression
|
|
18
11
|
super
|
19
12
|
end
|
20
13
|
|
21
|
-
def parts
|
22
|
-
if implicit?
|
23
|
-
expressions
|
24
|
-
else
|
25
|
-
super
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
14
|
def implicit?
|
30
15
|
@implicit
|
31
16
|
end
|
@@ -55,8 +40,6 @@ module Regexp::Expression
|
|
55
40
|
class Capture < Group::Base
|
56
41
|
attr_accessor :number, :number_at_level
|
57
42
|
alias identifier number
|
58
|
-
|
59
|
-
def capturing?; true end
|
60
43
|
end
|
61
44
|
|
62
45
|
class Named < Group::Capture
|
@@ -75,11 +58,6 @@ module Regexp::Expression
|
|
75
58
|
end
|
76
59
|
|
77
60
|
class Comment < Group::Base
|
78
|
-
def parts
|
79
|
-
[text.dup]
|
80
|
-
end
|
81
|
-
|
82
|
-
def comment?; true end
|
83
61
|
end
|
84
62
|
end
|
85
63
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Keep
|
3
|
-
#
|
3
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
4
4
|
# that contains all expressions to its left.
|
5
5
|
class Mark < Regexp::Expression::Base; end
|
6
6
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class PosixClass < Regexp::Expression::Base
|
3
|
-
def negative?
|
4
|
-
type == :nonposixclass
|
5
|
-
end
|
6
|
-
|
7
3
|
def name
|
8
|
-
|
4
|
+
text[/\w+/]
|
9
5
|
end
|
10
6
|
end
|
7
|
+
|
8
|
+
# alias for symmetry between token symbol and Expression class name
|
9
|
+
Posixclass = PosixClass
|
10
|
+
Nonposixclass = PosixClass
|
11
11
|
end
|
@@ -1,17 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :property, one way or the other, in v3.0.0
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
|
-
def negative?
|
6
|
-
type == :nonproperty
|
7
|
-
end
|
8
|
-
|
9
4
|
def name
|
10
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
6
|
end
|
12
7
|
|
13
8
|
def shortcut
|
14
|
-
|
9
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
|
@@ -110,10 +105,15 @@ module Regexp::Expression
|
|
110
105
|
class Unassigned < Codepoint::Base; end
|
111
106
|
end
|
112
107
|
|
113
|
-
class Age
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
117
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
118
114
|
end
|
115
|
+
|
116
|
+
# alias for symmetry between token symbol and Expression class name
|
117
|
+
Property = UnicodeProperty
|
118
|
+
Nonproperty = UnicodeProperty
|
119
119
|
end # module Regexp::Expression
|
@@ -25,11 +25,9 @@ module Regexp::Expression
|
|
25
25
|
def token_class
|
26
26
|
if self == Root || self < Sequence
|
27
27
|
nil # no token class because these objects are Parser-generated
|
28
|
-
# TODO: synch exp
|
29
|
-
elsif self ==
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
30
|
Regexp::Syntax::Token::Meta
|
31
|
-
elsif self <= EscapeSequence::Base
|
32
|
-
Regexp::Syntax::Token::Escape
|
33
31
|
else
|
34
32
|
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
33
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Regexp::Expression::EscapeSequence
|
2
|
+
AsciiEscape.class_eval { def codepoint; 0x1B end }
|
3
|
+
Backspace.class_eval { def codepoint; 0x8 end }
|
4
|
+
Bell.class_eval { def codepoint; 0x7 end }
|
5
|
+
FormFeed.class_eval { def codepoint; 0xC end }
|
6
|
+
Newline.class_eval { def codepoint; 0xA end }
|
7
|
+
Return.class_eval { def codepoint; 0xD end }
|
8
|
+
Tab.class_eval { def codepoint; 0x9 end }
|
9
|
+
VerticalTab.class_eval { def codepoint; 0xB end }
|
10
|
+
|
11
|
+
Literal.class_eval { def codepoint; text[1].ord end }
|
12
|
+
|
13
|
+
Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
|
14
|
+
|
15
|
+
Hex.class_eval { def codepoint; text[/\h+/].hex end }
|
16
|
+
Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
|
17
|
+
|
18
|
+
CodepointList.class_eval do
|
19
|
+
# Maybe this should be a unique top-level expression class?
|
20
|
+
def char
|
21
|
+
raise NoMethodError, 'CodepointList responds only to #chars'
|
22
|
+
end
|
23
|
+
|
24
|
+
def codepoint
|
25
|
+
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
26
|
+
end
|
27
|
+
|
28
|
+
def chars
|
29
|
+
codepoints.map { |cp| cp.chr('utf-8') }
|
30
|
+
end
|
31
|
+
|
32
|
+
def codepoints
|
33
|
+
text.scan(/\h+/).map(&:hex)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
AbstractMetaControlSequence.class_eval do
|
38
|
+
private
|
39
|
+
|
40
|
+
def control_sequence_to_s(control_sequence)
|
41
|
+
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
42
|
+
["000#{five_lsb}"].pack('B*')
|
43
|
+
end
|
44
|
+
|
45
|
+
def meta_char_to_codepoint(meta_char)
|
46
|
+
byte_value = meta_char.ord
|
47
|
+
byte_value < 128 ? byte_value + 128 : byte_value
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
Control.class_eval do
|
52
|
+
def codepoint
|
53
|
+
control_sequence_to_s(text).ord
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Meta.class_eval do
|
58
|
+
def codepoint
|
59
|
+
meta_char_to_codepoint(text[-1])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
MetaControl.class_eval do
|
64
|
+
def codepoint
|
65
|
+
meta_char_to_codepoint(control_sequence_to_s(text))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -63,16 +63,20 @@ class Regexp::MatchLength
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def to_re
|
66
|
-
|
66
|
+
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
70
70
|
|
71
71
|
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
if Regexp.method_defined?(:match?) # ruby >= 2.4
|
74
|
+
def test_regexp
|
75
|
+
@test_regexp ||= /^#{to_re}$/
|
76
|
+
end
|
77
|
+
else
|
78
|
+
def test_regexp
|
79
|
+
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module ReferencedExpressions
|
3
|
+
attr_accessor :referenced_expressions
|
4
|
+
|
5
|
+
def referenced_expression
|
6
|
+
referenced_expressions && referenced_expressions.first
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize_copy(orig)
|
10
|
+
exp_id = [self.class, self.starts_at]
|
11
|
+
|
12
|
+
# prevent infinite recursion for recursive subexp calls
|
13
|
+
copied = self.class.instance_eval { @copied_ref_exps ||= {} }
|
14
|
+
self.referenced_expressions =
|
15
|
+
if copied[exp_id]
|
16
|
+
orig.referenced_expressions
|
17
|
+
else
|
18
|
+
copied[exp_id] = true
|
19
|
+
orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
|
20
|
+
end
|
21
|
+
copied.clear
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Base.include ReferencedExpressions
|
28
|
+
end
|