regexp_parser 2.9.2 → 2.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/regexp_parser/expression/classes/backreference.rb +1 -20
- data/lib/regexp_parser/expression/classes/conditional.rb +0 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +18 -90
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +5 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +68 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +28 -0
- data/lib/regexp_parser/expression.rb +3 -0
- data/lib/regexp_parser/parser.rb +8 -5
- data/lib/regexp_parser/scanner/scanner.rl +2 -2
- data/lib/regexp_parser/scanner.rb +725 -768
- data/lib/regexp_parser/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
|
4
|
+
data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
|
7
|
+
data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
|
@@ -1,25 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Backreference
|
3
|
-
class Base < Regexp::Expression::Base
|
4
|
-
attr_accessor :referenced_expression
|
5
|
-
|
6
|
-
def initialize_copy(orig)
|
7
|
-
exp_id = [self.class, self.starts_at]
|
8
|
-
|
9
|
-
# prevent infinite recursion for recursive subexp calls
|
10
|
-
copied = @@copied ||= {}
|
11
|
-
self.referenced_expression =
|
12
|
-
if copied[exp_id]
|
13
|
-
orig.referenced_expression
|
14
|
-
else
|
15
|
-
copied[exp_id] = true
|
16
|
-
orig.referenced_expression.dup
|
17
|
-
end
|
18
|
-
copied.clear
|
19
|
-
|
20
|
-
super
|
21
|
-
end
|
22
|
-
end
|
3
|
+
class Base < Regexp::Expression::Base; end
|
23
4
|
|
24
5
|
class Number < Backreference::Base
|
25
6
|
attr_reader :number
|
@@ -7,26 +7,17 @@ module Regexp::Expression
|
|
7
7
|
end
|
8
8
|
|
9
9
|
class Condition < Regexp::Expression::Base
|
10
|
-
attr_accessor :referenced_expression
|
11
|
-
|
12
10
|
# Name or number of the referenced capturing group that determines state.
|
13
11
|
# Returns a String if reference is by name, Integer if by number.
|
14
12
|
def reference
|
15
13
|
ref = text.tr("'<>()", "")
|
16
14
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
15
|
end
|
18
|
-
|
19
|
-
def initialize_copy(orig)
|
20
|
-
self.referenced_expression = orig.referenced_expression.dup
|
21
|
-
super
|
22
|
-
end
|
23
16
|
end
|
24
17
|
|
25
18
|
class Branch < Regexp::Expression::Sequence; end
|
26
19
|
|
27
20
|
class Expression < Regexp::Expression::Subexpression
|
28
|
-
attr_accessor :referenced_expression
|
29
|
-
|
30
21
|
def <<(exp)
|
31
22
|
expressions.last << exp
|
32
23
|
end
|
@@ -54,11 +45,6 @@ module Regexp::Expression
|
|
54
45
|
def reference
|
55
46
|
condition.reference
|
56
47
|
end
|
57
|
-
|
58
|
-
def initialize_copy(orig)
|
59
|
-
self.referenced_expression = orig.referenced_expression.dup
|
60
|
-
super
|
61
|
-
end
|
62
48
|
end
|
63
49
|
end
|
64
50
|
end
|
@@ -1,100 +1,28 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module EscapeSequence
|
3
|
-
|
4
|
-
def codepoint
|
5
|
-
char.ord
|
6
|
-
end
|
3
|
+
Base = Class.new(Regexp::Expression::Base)
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
5
|
+
AsciiEscape = Class.new(Base) # \e
|
6
|
+
Backspace = Class.new(Base) # \b
|
7
|
+
Bell = Class.new(Base) # \a
|
8
|
+
FormFeed = Class.new(Base) # \f
|
9
|
+
Newline = Class.new(Base) # \n
|
10
|
+
Return = Class.new(Base) # \r
|
11
|
+
Tab = Class.new(Base) # \t
|
12
|
+
VerticalTab = Class.new(Base) # \v
|
20
13
|
|
21
|
-
|
22
|
-
def char
|
23
|
-
text[1..-1]
|
24
|
-
end
|
25
|
-
end
|
14
|
+
Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
|
26
15
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
class FormFeed < EscapeSequence::Base; end
|
31
|
-
class Newline < EscapeSequence::Base; end
|
32
|
-
class Return < EscapeSequence::Base; end
|
33
|
-
class Tab < EscapeSequence::Base; end
|
34
|
-
class VerticalTab < EscapeSequence::Base; end
|
16
|
+
Octal = Class.new(Base) # e.g. \012
|
17
|
+
Hex = Class.new(Base) # e.g. \x0A
|
18
|
+
Codepoint = Class.new(Base) # e.g. \u000A
|
35
19
|
|
36
|
-
|
37
|
-
class Codepoint < EscapeSequence::Base; end
|
20
|
+
CodepointList = Class.new(Base) # e.g. \u{A B}
|
38
21
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
def codepoint
|
45
|
-
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
46
|
-
end
|
47
|
-
|
48
|
-
def chars
|
49
|
-
codepoints.map { |cp| cp.chr('utf-8') }
|
50
|
-
end
|
51
|
-
|
52
|
-
def codepoints
|
53
|
-
text.scan(/\h+/).map(&:hex)
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
class Octal < EscapeSequence::Base
|
58
|
-
def char
|
59
|
-
text[1..-1].to_i(8).chr('utf-8')
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
class AbstractMetaControlSequence < EscapeSequence::Base
|
64
|
-
def char
|
65
|
-
codepoint.chr('utf-8')
|
66
|
-
end
|
67
|
-
|
68
|
-
private
|
69
|
-
|
70
|
-
def control_sequence_to_s(control_sequence)
|
71
|
-
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
72
|
-
["000#{five_lsb}"].pack('B*')
|
73
|
-
end
|
74
|
-
|
75
|
-
def meta_char_to_codepoint(meta_char)
|
76
|
-
byte_value = meta_char.ord
|
77
|
-
byte_value < 128 ? byte_value + 128 : byte_value
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
class Control < AbstractMetaControlSequence
|
82
|
-
def codepoint
|
83
|
-
control_sequence_to_s(text).ord
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
class Meta < AbstractMetaControlSequence
|
88
|
-
def codepoint
|
89
|
-
meta_char_to_codepoint(text[-1])
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
class MetaControl < AbstractMetaControlSequence
|
94
|
-
def codepoint
|
95
|
-
meta_char_to_codepoint(control_sequence_to_s(text))
|
96
|
-
end
|
97
|
-
end
|
22
|
+
AbstractMetaControlSequence = Class.new(Base)
|
23
|
+
Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
|
24
|
+
Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
|
25
|
+
MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
|
98
26
|
end
|
99
27
|
|
100
28
|
# alias for symmetry between Token::* and Expression::*
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Regexp::Expression::EscapeSequence
|
2
|
+
AsciiEscape.class_eval { def codepoint; 0x1B end }
|
3
|
+
Backspace.class_eval { def codepoint; 0x8 end }
|
4
|
+
Bell.class_eval { def codepoint; 0x7 end }
|
5
|
+
FormFeed.class_eval { def codepoint; 0xC end }
|
6
|
+
Newline.class_eval { def codepoint; 0xA end }
|
7
|
+
Return.class_eval { def codepoint; 0xD end }
|
8
|
+
Tab.class_eval { def codepoint; 0x9 end }
|
9
|
+
VerticalTab.class_eval { def codepoint; 0xB end }
|
10
|
+
|
11
|
+
Literal.class_eval { def codepoint; text[1].ord end }
|
12
|
+
|
13
|
+
Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
|
14
|
+
|
15
|
+
Hex.class_eval { def codepoint; text[/\h+/].hex end }
|
16
|
+
Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
|
17
|
+
|
18
|
+
CodepointList.class_eval do
|
19
|
+
# Maybe this should be a unique top-level expression class?
|
20
|
+
def char
|
21
|
+
raise NoMethodError, 'CodepointList responds only to #chars'
|
22
|
+
end
|
23
|
+
|
24
|
+
def codepoint
|
25
|
+
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
26
|
+
end
|
27
|
+
|
28
|
+
def chars
|
29
|
+
codepoints.map { |cp| cp.chr('utf-8') }
|
30
|
+
end
|
31
|
+
|
32
|
+
def codepoints
|
33
|
+
text.scan(/\h+/).map(&:hex)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
AbstractMetaControlSequence.class_eval do
|
38
|
+
private
|
39
|
+
|
40
|
+
def control_sequence_to_s(control_sequence)
|
41
|
+
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
42
|
+
["000#{five_lsb}"].pack('B*')
|
43
|
+
end
|
44
|
+
|
45
|
+
def meta_char_to_codepoint(meta_char)
|
46
|
+
byte_value = meta_char.ord
|
47
|
+
byte_value < 128 ? byte_value + 128 : byte_value
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
Control.class_eval do
|
52
|
+
def codepoint
|
53
|
+
control_sequence_to_s(text).ord
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
Meta.class_eval do
|
58
|
+
def codepoint
|
59
|
+
meta_char_to_codepoint(text[-1])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
MetaControl.class_eval do
|
64
|
+
def codepoint
|
65
|
+
meta_char_to_codepoint(control_sequence_to_s(text))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module ReferencedExpressions
|
3
|
+
attr_accessor :referenced_expressions
|
4
|
+
|
5
|
+
def referenced_expression
|
6
|
+
referenced_expressions && referenced_expressions.first
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize_copy(orig)
|
10
|
+
exp_id = [self.class, self.starts_at]
|
11
|
+
|
12
|
+
# prevent infinite recursion for recursive subexp calls
|
13
|
+
copied = self.class.instance_eval { @copied_ref_exps ||= {} }
|
14
|
+
self.referenced_expressions =
|
15
|
+
if copied[exp_id]
|
16
|
+
orig.referenced_expressions
|
17
|
+
else
|
18
|
+
copied[exp_id] = true
|
19
|
+
orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
|
20
|
+
end
|
21
|
+
copied.clear
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
Base.include ReferencedExpressions
|
28
|
+
end
|
@@ -25,6 +25,8 @@ require_relative 'expression/classes/root'
|
|
25
25
|
require_relative 'expression/classes/unicode_property'
|
26
26
|
|
27
27
|
require_relative 'expression/methods/construct'
|
28
|
+
require_relative 'expression/methods/escape_sequence_char'
|
29
|
+
require_relative 'expression/methods/escape_sequence_codepoint'
|
28
30
|
require_relative 'expression/methods/human_name'
|
29
31
|
require_relative 'expression/methods/match'
|
30
32
|
require_relative 'expression/methods/match_length'
|
@@ -32,6 +34,7 @@ require_relative 'expression/methods/negative'
|
|
32
34
|
require_relative 'expression/methods/options'
|
33
35
|
require_relative 'expression/methods/parts'
|
34
36
|
require_relative 'expression/methods/printing'
|
37
|
+
require_relative 'expression/methods/referenced_expressions'
|
35
38
|
require_relative 'expression/methods/strfregexp'
|
36
39
|
require_relative 'expression/methods/tests'
|
37
40
|
require_relative 'expression/methods/traverse'
|
data/lib/regexp_parser/parser.rb
CHANGED
@@ -580,16 +580,19 @@ class Regexp::Parser
|
|
580
580
|
# the instance of Group::Capture that it refers to via its number.
|
581
581
|
def assign_referenced_expressions
|
582
582
|
# find all referenceable and referring expressions
|
583
|
-
targets = { 0 => root }
|
583
|
+
targets = { 0 => [root] }
|
584
584
|
referrers = []
|
585
585
|
root.each_expression do |exp|
|
586
|
-
exp.
|
587
|
-
|
586
|
+
if exp.referential?
|
587
|
+
referrers << exp
|
588
|
+
elsif exp.is_a?(Group::Capture)
|
589
|
+
(targets[exp.identifier] ||= []) << exp
|
590
|
+
end
|
588
591
|
end
|
589
|
-
# assign
|
592
|
+
# assign referenced expressions to referring expressions
|
590
593
|
# (in a second iteration because there might be forward references)
|
591
594
|
referrers.each do |exp|
|
592
|
-
exp.
|
595
|
+
exp.referenced_expressions = targets[exp.reference] ||
|
593
596
|
raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
|
594
597
|
end
|
595
598
|
end
|
@@ -78,8 +78,8 @@
|
|
78
78
|
# try to treat every other group head as options group, like Ruby
|
79
79
|
group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
|
80
80
|
|
81
|
-
group_name_id_ab = ([
|
82
|
-
group_name_id_sq = ([^0-9\-']
|
81
|
+
group_name_id_ab = ([^!=0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
|
82
|
+
group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
|
83
83
|
group_number = '-'? . [0-9]+;
|
84
84
|
group_level = [+\-] . [0-9]+;
|
85
85
|
|