regexp_parser 2.6.0 → 2.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +5 -5
- data/LICENSE +1 -1
- data/lib/regexp_parser/expression/base.rb +0 -7
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/backreference.rb +5 -10
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -7
- data/lib/regexp_parser/expression/classes/character_set.rb +4 -8
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -20
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +21 -91
- data/lib/regexp_parser/expression/classes/free_space.rb +3 -1
- data/lib/regexp_parser/expression/classes/group.rb +0 -22
- data/lib/regexp_parser/expression/classes/keep.rb +1 -1
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/unicode_property.rb +11 -11
- data/lib/regexp_parser/expression/methods/construct.rb +2 -4
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +8 -4
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
- data/lib/regexp_parser/expression/methods/tests.rb +40 -3
- data/lib/regexp_parser/expression/methods/traverse.rb +35 -19
- data/lib/regexp_parser/expression/quantifier.rb +30 -17
- data/lib/regexp_parser/expression/sequence.rb +5 -10
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +37 -20
- data/lib/regexp_parser/expression/subexpression.rb +20 -15
- data/lib/regexp_parser/expression.rb +37 -31
- data/lib/regexp_parser/lexer.rb +76 -36
- data/lib/regexp_parser/parser.rb +107 -103
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +29 -0
- data/lib/regexp_parser/scanner/properties/short.csv +3 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +101 -172
- data/lib/regexp_parser/scanner.rb +1171 -1365
- data/lib/regexp_parser/syntax/token/backreference.rb +3 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -0
- data/lib/regexp_parser/syntax/token/escape.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +9 -2
- data/lib/regexp_parser/syntax/token/unicode_property.rb +35 -1
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +13 -13
- data/lib/regexp_parser/syntax/version_lookup.rb +0 -8
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +6 -6
- data/regexp_parser.gemspec +5 -5
- metadata +17 -8
- data/CHANGELOG.md +0 -601
- data/README.md +0 -503
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
|
4
|
+
data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
|
7
|
+
data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
|
data/Gemfile
CHANGED
@@ -3,13 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
7
|
-
gem 'rake', '~> 13.
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
11
|
gem 'benchmark-ips', '~> 2.1'
|
12
|
-
gem 'gouteur'
|
13
|
-
gem 'rubocop', '~> 1.
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
14
|
end
|
15
15
|
end
|
data/LICENSE
CHANGED
@@ -6,13 +6,6 @@ module Regexp::Expression
|
|
6
6
|
init_from_token_and_options(token, options)
|
7
7
|
end
|
8
8
|
|
9
|
-
def initialize_copy(orig)
|
10
|
-
self.text = orig.text.dup if orig.text
|
11
|
-
self.options = orig.options.dup if orig.options
|
12
|
-
self.quantifier = orig.quantifier.clone if orig.quantifier
|
13
|
-
super
|
14
|
-
end
|
15
|
-
|
16
9
|
def to_re(format = :full)
|
17
10
|
if set_level > 0
|
18
11
|
warn "Calling #to_re on character set members is deprecated - "\
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -1,21 +1,13 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :backref, one way or the other, in v3.0.0
|
3
2
|
module Backreference
|
4
|
-
class Base < Regexp::Expression::Base
|
5
|
-
attr_accessor :referenced_expression
|
6
|
-
|
7
|
-
def initialize_copy(orig)
|
8
|
-
self.referenced_expression = orig.referenced_expression.dup
|
9
|
-
super
|
10
|
-
end
|
11
|
-
end
|
3
|
+
class Base < Regexp::Expression::Base; end
|
12
4
|
|
13
5
|
class Number < Backreference::Base
|
14
6
|
attr_reader :number
|
15
7
|
alias reference number
|
16
8
|
|
17
9
|
def initialize(token, options = {})
|
18
|
-
@number = token.text[
|
10
|
+
@number = token.text[/-?\d+/].to_i
|
19
11
|
super
|
20
12
|
end
|
21
13
|
end
|
@@ -58,4 +50,7 @@ module Regexp::Expression
|
|
58
50
|
end
|
59
51
|
end
|
60
52
|
end
|
53
|
+
|
54
|
+
# alias for symmetry between token symbol and Expression class name
|
55
|
+
Backref = Backreference
|
61
56
|
end
|
@@ -1,10 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
9
|
complete? and raise Regexp::Parser::Error,
|
@@ -15,10 +14,6 @@ module Regexp::Expression
|
|
15
14
|
def complete?
|
16
15
|
count == 2
|
17
16
|
end
|
18
|
-
|
19
|
-
def parts
|
20
|
-
intersperse(expressions, text.dup)
|
21
|
-
end
|
22
17
|
end
|
23
18
|
end
|
24
19
|
end
|
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -19,9 +16,8 @@ module Regexp::Expression
|
|
19
16
|
def close
|
20
17
|
self.closed = true
|
21
18
|
end
|
22
|
-
|
23
|
-
def parts
|
24
|
-
["#{text}#{'^' if negated?}", *expressions, ']']
|
25
|
-
end
|
26
19
|
end
|
20
|
+
|
21
|
+
# alias for symmetry between token symbol and Expression class name
|
22
|
+
Set = CharacterSet
|
27
23
|
end # module Regexp::Expression
|
@@ -7,33 +7,24 @@ module Regexp::Expression
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class Condition < Regexp::Expression::Base
|
10
|
-
attr_accessor :referenced_expression
|
11
|
-
|
12
10
|
# Name or number of the referenced capturing group that determines state.
|
13
11
|
# Returns a String if reference is by name, Integer if by number.
|
14
12
|
def reference
|
15
13
|
ref = text.tr("'<>()", "")
|
16
14
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
15
|
end
|
18
|
-
|
19
|
-
def initialize_copy(orig)
|
20
|
-
self.referenced_expression = orig.referenced_expression.dup
|
21
|
-
super
|
22
|
-
end
|
23
16
|
end
|
24
17
|
|
25
18
|
class Branch < Regexp::Expression::Sequence; end
|
26
19
|
|
27
20
|
class Expression < Regexp::Expression::Subexpression
|
28
|
-
attr_accessor :referenced_expression
|
29
|
-
|
30
21
|
def <<(exp)
|
31
22
|
expressions.last << exp
|
32
23
|
end
|
33
24
|
|
34
|
-
def add_sequence(active_opts = {})
|
25
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
35
26
|
raise TooManyBranches.new if branches.length == 2
|
36
|
-
params = { conditional_level: conditional_level + 1 }
|
27
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
37
28
|
Branch.add_to(self, params, active_opts)
|
38
29
|
end
|
39
30
|
alias :branch :add_sequence
|
@@ -54,15 +45,6 @@ module Regexp::Expression
|
|
54
45
|
def reference
|
55
46
|
condition.reference
|
56
47
|
end
|
57
|
-
|
58
|
-
def parts
|
59
|
-
[text.dup, condition, *intersperse(branches, '|'), ')']
|
60
|
-
end
|
61
|
-
|
62
|
-
def initialize_copy(orig)
|
63
|
-
self.referenced_expression = orig.referenced_expression.dup
|
64
|
-
super
|
65
|
-
end
|
66
48
|
end
|
67
49
|
end
|
68
50
|
end
|
@@ -1,100 +1,30 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify naming with Token::Escape, one way or the other, in v3.0.0
|
3
2
|
module EscapeSequence
|
4
|
-
|
5
|
-
def codepoint
|
6
|
-
char.ord
|
7
|
-
end
|
3
|
+
Base = Class.new(Regexp::Expression::Base)
|
8
4
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
5
|
+
AsciiEscape = Class.new(Base) # \e
|
6
|
+
Backspace = Class.new(Base) # \b
|
7
|
+
Bell = Class.new(Base) # \a
|
8
|
+
FormFeed = Class.new(Base) # \f
|
9
|
+
Newline = Class.new(Base) # \n
|
10
|
+
Return = Class.new(Base) # \r
|
11
|
+
Tab = Class.new(Base) # \t
|
12
|
+
VerticalTab = Class.new(Base) # \v
|
21
13
|
|
22
|
-
|
23
|
-
def char
|
24
|
-
text[1..-1]
|
25
|
-
end
|
26
|
-
end
|
14
|
+
Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
|
27
15
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
class FormFeed < EscapeSequence::Base; end
|
32
|
-
class Newline < EscapeSequence::Base; end
|
33
|
-
class Return < EscapeSequence::Base; end
|
34
|
-
class Tab < EscapeSequence::Base; end
|
35
|
-
class VerticalTab < EscapeSequence::Base; end
|
16
|
+
Octal = Class.new(Base) # e.g. \012
|
17
|
+
Hex = Class.new(Base) # e.g. \x0A
|
18
|
+
Codepoint = Class.new(Base) # e.g. \u000A
|
36
19
|
|
37
|
-
|
38
|
-
class Codepoint < EscapeSequence::Base; end
|
20
|
+
CodepointList = Class.new(Base) # e.g. \u{A B}
|
39
21
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
def codepoint
|
46
|
-
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
47
|
-
end
|
48
|
-
|
49
|
-
def chars
|
50
|
-
codepoints.map { |cp| cp.chr('utf-8') }
|
51
|
-
end
|
52
|
-
|
53
|
-
def codepoints
|
54
|
-
text.scan(/\h+/).map(&:hex)
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
class Octal < EscapeSequence::Base
|
59
|
-
def char
|
60
|
-
text[1..-1].to_i(8).chr('utf-8')
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
class AbstractMetaControlSequence < EscapeSequence::Base
|
65
|
-
def char
|
66
|
-
codepoint.chr('utf-8')
|
67
|
-
end
|
68
|
-
|
69
|
-
private
|
70
|
-
|
71
|
-
def control_sequence_to_s(control_sequence)
|
72
|
-
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
73
|
-
["000#{five_lsb}"].pack('B*')
|
74
|
-
end
|
75
|
-
|
76
|
-
def meta_char_to_codepoint(meta_char)
|
77
|
-
byte_value = meta_char.ord
|
78
|
-
byte_value < 128 ? byte_value + 128 : byte_value
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
class Control < AbstractMetaControlSequence
|
83
|
-
def codepoint
|
84
|
-
control_sequence_to_s(text).ord
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
class Meta < AbstractMetaControlSequence
|
89
|
-
def codepoint
|
90
|
-
meta_char_to_codepoint(text[-1])
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
class MetaControl < AbstractMetaControlSequence
|
95
|
-
def codepoint
|
96
|
-
meta_char_to_codepoint(control_sequence_to_s(text))
|
97
|
-
end
|
98
|
-
end
|
22
|
+
AbstractMetaControlSequence = Class.new(Base)
|
23
|
+
Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
|
24
|
+
Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
|
25
|
+
MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
|
99
26
|
end
|
27
|
+
|
28
|
+
# alias for symmetry between Token::* and Expression::*
|
29
|
+
Escape = EscapeSequence
|
100
30
|
end
|
@@ -5,10 +5,12 @@ module Regexp::Expression
|
|
5
5
|
end
|
6
6
|
end
|
7
7
|
|
8
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
9
10
|
|
10
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
11
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
12
14
|
text << exp.text
|
13
15
|
end
|
14
16
|
end
|
@@ -1,13 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
def parts
|
5
|
-
[text.dup, *expressions, ')']
|
6
|
-
end
|
7
|
-
|
8
|
-
def capturing?; false end
|
9
|
-
|
10
|
-
def comment?; false end
|
11
4
|
end
|
12
5
|
|
13
6
|
class Passive < Group::Base
|
@@ -18,14 +11,6 @@ module Regexp::Expression
|
|
18
11
|
super
|
19
12
|
end
|
20
13
|
|
21
|
-
def parts
|
22
|
-
if implicit?
|
23
|
-
expressions
|
24
|
-
else
|
25
|
-
super
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
14
|
def implicit?
|
30
15
|
@implicit
|
31
16
|
end
|
@@ -55,8 +40,6 @@ module Regexp::Expression
|
|
55
40
|
class Capture < Group::Base
|
56
41
|
attr_accessor :number, :number_at_level
|
57
42
|
alias identifier number
|
58
|
-
|
59
|
-
def capturing?; true end
|
60
43
|
end
|
61
44
|
|
62
45
|
class Named < Group::Capture
|
@@ -75,11 +58,6 @@ module Regexp::Expression
|
|
75
58
|
end
|
76
59
|
|
77
60
|
class Comment < Group::Base
|
78
|
-
def parts
|
79
|
-
[text.dup]
|
80
|
-
end
|
81
|
-
|
82
|
-
def comment?; true end
|
83
61
|
end
|
84
62
|
end
|
85
63
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Keep
|
3
|
-
#
|
3
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
4
4
|
# that contains all expressions to its left.
|
5
5
|
class Mark < Regexp::Expression::Base; end
|
6
6
|
end
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class PosixClass < Regexp::Expression::Base
|
3
|
-
def negative?
|
4
|
-
type == :nonposixclass
|
5
|
-
end
|
6
|
-
|
7
3
|
def name
|
8
|
-
|
4
|
+
text[/\w+/]
|
9
5
|
end
|
10
6
|
end
|
7
|
+
|
8
|
+
# alias for symmetry between token symbol and Expression class name
|
9
|
+
Posixclass = PosixClass
|
10
|
+
Nonposixclass = PosixClass
|
11
11
|
end
|
@@ -1,17 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# TODO: unify name with token :property, one way or the other, in v3.0.0
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
|
-
def negative?
|
6
|
-
type == :nonproperty
|
7
|
-
end
|
8
|
-
|
9
4
|
def name
|
10
5
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
6
|
end
|
12
7
|
|
13
8
|
def shortcut
|
14
|
-
|
9
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
|
@@ -110,10 +105,15 @@ module Regexp::Expression
|
|
110
105
|
class Unassigned < Codepoint::Base; end
|
111
106
|
end
|
112
107
|
|
113
|
-
class Age
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
117
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
118
114
|
end
|
115
|
+
|
116
|
+
# alias for symmetry between token symbol and Expression class name
|
117
|
+
Property = UnicodeProperty
|
118
|
+
Nonproperty = UnicodeProperty
|
119
119
|
end # module Regexp::Expression
|
@@ -25,11 +25,9 @@ module Regexp::Expression
|
|
25
25
|
def token_class
|
26
26
|
if self == Root || self < Sequence
|
27
27
|
nil # no token class because these objects are Parser-generated
|
28
|
-
# TODO: synch exp
|
29
|
-
elsif self ==
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
30
|
Regexp::Syntax::Token::Meta
|
31
|
-
elsif self <= EscapeSequence::Base
|
32
|
-
Regexp::Syntax::Token::Escape
|
33
31
|
else
|
34
32
|
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
35
33
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Regexp::Expression::EscapeSequence
|
2
|
+
AsciiEscape.class_eval { def codepoint; 0x1B end }
|
3
|
+
Backspace.class_eval { def codepoint; 0x8 end }
|
4
|
+
Bell.class_eval { def codepoint; 0x7 end }
|
5
|
+
FormFeed.class_eval { def codepoint; 0xC end }
|
6
|
+
Newline.class_eval { def codepoint; 0xA end }
|
7
|
+
Return.class_eval { def codepoint; 0xD end }
|
8
|
+
Tab.class_eval { def codepoint; 0x9 end }
|
9
|
+
VerticalTab.class_eval { def codepoint; 0xB end }
|
10
|
+
|
11
|
+
Literal.class_eval { def codepoint; text[1].ord end }
|
12
|
+
|
13
|
+
Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
|
14
|
+
|
15
|
+
Hex.class_eval { def codepoint; text[/\h+/].hex end }
|
16
|
+
Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
|
17
|
+
|
18
|
+
CodepointList.class_eval do
|
19
|
+
# Maybe this should be a unique top-level expression class?
|
20
|
+
def char
|
21
|
+
raise NoMethodError, 'CodepointList responds only to #chars'
|
22
|
+
end
|
23
|
+
|
24
|
+
def codepoint
|
25
|
+
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
26
|
+
end
|
27
|
+
|
28
|
+
def chars
|
29
|
+
codepoints.map { |cp| cp.chr('utf-8') }
|
30
|
+
end
|
31
|
+
|
32
|
+
def codepoints
|
33
|
+
text.scan(/\h+/).map(&:hex)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
AbstractMetaControlSequence.class_eval do
|
38
|
+
private
|
39
|
+
|
40
|
+
def control_sequence_to_s(control_sequence)
|
41
|
+
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
42
|
+
["000#{five_lsb}"].pack('B*')
|
43
|
+
end
|
44
|
+
|
45
|
+
def meta_char_to_codepoint(meta_char)
|
46
|
+
byte_value = meta_char.ord
|
47
|
+
byte_value < 128 ? byte_value + 128 : byte_value
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
Control.class_eval do
|
52
|
+
def codepoint
|
53
|
+
control_sequence_to_s(text).ord
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Meta.class_eval do
|
58
|
+
def codepoint
|
59
|
+
meta_char_to_codepoint(text[-1])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
MetaControl.class_eval do
|
64
|
+
def codepoint
|
65
|
+
meta_char_to_codepoint(control_sequence_to_s(text))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -63,16 +63,20 @@ class Regexp::MatchLength
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def to_re
|
66
|
-
|
66
|
+
/(?:#{reify.call}){#{min_rep},#{max_rep unless max_rep == Float::INFINITY}}/
|
67
67
|
end
|
68
68
|
|
69
69
|
private
|
70
70
|
|
71
71
|
attr_accessor :base_min, :base_max, :min_rep, :max_rep, :exp_class, :reify
|
72
72
|
|
73
|
-
|
74
|
-
|
75
|
-
|
73
|
+
if Regexp.method_defined?(:match?) # ruby >= 2.4
|
74
|
+
def test_regexp
|
75
|
+
@test_regexp ||= /^#{to_re}$/
|
76
|
+
end
|
77
|
+
else
|
78
|
+
def test_regexp
|
79
|
+
@test_regexp ||= /^#{to_re}$/.tap { |r| def r.match?(s); !!match(s) end }
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def negative?
|
4
|
+
false
|
5
|
+
end
|
6
|
+
|
7
|
+
# not an alias so as to respect overrides of #negative?
|
8
|
+
def negated?
|
9
|
+
negative?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
14
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
15
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
16
|
+
CharacterSet.class_eval { def negative?; negative end }
|
17
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
18
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
19
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
20
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
# default implementation
|
4
|
+
def parts
|
5
|
+
[text.dup]
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def intersperse(expressions, separator)
|
11
|
+
expressions.flat_map { |exp| [exp, separator] }.slice(0...-1)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
CharacterSet.class_eval { def parts; ["#{text}#{'^' if negated?}", *expressions, ']'] end }
|
16
|
+
CharacterSet::Range.class_eval { def parts; intersperse(expressions, text.dup) end }
|
17
|
+
Conditional::Expression.class_eval { def parts; [text.dup, condition, *intersperse(branches, '|'), ')'] end }
|
18
|
+
Group::Base.class_eval { def parts; [text.dup, *expressions, ')'] end }
|
19
|
+
Group::Passive.class_eval { def parts; implicit? ? expressions : super end }
|
20
|
+
Group::Comment.class_eval { def parts; [text.dup] end }
|
21
|
+
Subexpression.class_eval { def parts; expressions end }
|
22
|
+
SequenceOperation.class_eval { def parts; intersperse(expressions, text.dup) end }
|
23
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
def inspect
|
4
|
+
[
|
5
|
+
"#<#{self.class}",
|
6
|
+
pretty_print_instance_variables.map { |v| " #{v}=#{instance_variable_get(v).inspect}" },
|
7
|
+
">"
|
8
|
+
].join
|
9
|
+
end
|
10
|
+
|
11
|
+
# Make pretty-print work despite #inspect implementation.
|
12
|
+
def pretty_print(q)
|
13
|
+
q.pp_object(self)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Called by pretty_print (ruby/pp) and #inspect.
|
17
|
+
def pretty_print_instance_variables
|
18
|
+
[
|
19
|
+
(:@text unless text.to_s.empty?),
|
20
|
+
(:@quantifier if quantified?),
|
21
|
+
(:@options unless options.empty?),
|
22
|
+
(:@expressions unless terminal?),
|
23
|
+
].compact
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module ReferencedExpressions
|
3
|
+
attr_accessor :referenced_expressions
|
4
|
+
|
5
|
+
def referenced_expression
|
6
|
+
referenced_expressions && referenced_expressions.first
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize_copy(orig)
|
10
|
+
exp_id = [self.class, self.starts_at]
|
11
|
+
|
12
|
+
# prevent infinite recursion for recursive subexp calls
|
13
|
+
copied = self.class.instance_eval { @copied_ref_exps ||= {} }
|
14
|
+
self.referenced_expressions =
|
15
|
+
if copied[exp_id]
|
16
|
+
orig.referenced_expressions
|
17
|
+
else
|
18
|
+
copied[exp_id] = true
|
19
|
+
orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
|
20
|
+
end
|
21
|
+
copied.clear
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Base.include ReferencedExpressions
|
28
|
+
end
|