regexp_parser 2.9.2 → 2.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
4
- data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
3
+ metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
4
+ data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
5
5
  SHA512:
6
- metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
7
- data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
6
+ metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
7
+ data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
@@ -1,25 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Backreference
3
- class Base < Regexp::Expression::Base
4
- attr_accessor :referenced_expression
5
-
6
- def initialize_copy(orig)
7
- exp_id = [self.class, self.starts_at]
8
-
9
- # prevent infinite recursion for recursive subexp calls
10
- copied = @@copied ||= {}
11
- self.referenced_expression =
12
- if copied[exp_id]
13
- orig.referenced_expression
14
- else
15
- copied[exp_id] = true
16
- orig.referenced_expression.dup
17
- end
18
- copied.clear
19
-
20
- super
21
- end
22
- end
3
+ class Base < Regexp::Expression::Base; end
23
4
 
24
5
  class Number < Backreference::Base
25
6
  attr_reader :number
@@ -7,26 +7,17 @@ module Regexp::Expression
7
7
  end
8
8
 
9
9
  class Condition < Regexp::Expression::Base
10
- attr_accessor :referenced_expression
11
-
12
10
  # Name or number of the referenced capturing group that determines state.
13
11
  # Returns a String if reference is by name, Integer if by number.
14
12
  def reference
15
13
  ref = text.tr("'<>()", "")
16
14
  ref =~ /\D/ ? ref : Integer(ref)
17
15
  end
18
-
19
- def initialize_copy(orig)
20
- self.referenced_expression = orig.referenced_expression.dup
21
- super
22
- end
23
16
  end
24
17
 
25
18
  class Branch < Regexp::Expression::Sequence; end
26
19
 
27
20
  class Expression < Regexp::Expression::Subexpression
28
- attr_accessor :referenced_expression
29
-
30
21
  def <<(exp)
31
22
  expressions.last << exp
32
23
  end
@@ -54,11 +45,6 @@ module Regexp::Expression
54
45
  def reference
55
46
  condition.reference
56
47
  end
57
-
58
- def initialize_copy(orig)
59
- self.referenced_expression = orig.referenced_expression.dup
60
- super
61
- end
62
48
  end
63
49
  end
64
50
  end
@@ -1,100 +1,28 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
- class Base < Regexp::Expression::Base
4
- def codepoint
5
- char.ord
6
- end
3
+ Base = Class.new(Regexp::Expression::Base)
7
4
 
8
- if ''.respond_to?(:undump)
9
- def char
10
- %("#{text}").undump
11
- end
12
- else
13
- # poor man's unescape without using eval
14
- require 'yaml'
15
- def char
16
- YAML.load(%Q(---\n"#{text}"\n))
17
- end
18
- end
19
- end
5
+ AsciiEscape = Class.new(Base) # \e
6
+ Backspace = Class.new(Base) # \b
7
+ Bell = Class.new(Base) # \a
8
+ FormFeed = Class.new(Base) # \f
9
+ Newline = Class.new(Base) # \n
10
+ Return = Class.new(Base) # \r
11
+ Tab = Class.new(Base) # \t
12
+ VerticalTab = Class.new(Base) # \v
20
13
 
21
- class Literal < EscapeSequence::Base
22
- def char
23
- text[1..-1]
24
- end
25
- end
14
+ Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
26
15
 
27
- class AsciiEscape < EscapeSequence::Base; end
28
- class Backspace < EscapeSequence::Base; end
29
- class Bell < EscapeSequence::Base; end
30
- class FormFeed < EscapeSequence::Base; end
31
- class Newline < EscapeSequence::Base; end
32
- class Return < EscapeSequence::Base; end
33
- class Tab < EscapeSequence::Base; end
34
- class VerticalTab < EscapeSequence::Base; end
16
+ Octal = Class.new(Base) # e.g. \012
17
+ Hex = Class.new(Base) # e.g. \x0A
18
+ Codepoint = Class.new(Base) # e.g. \u000A
35
19
 
36
- class Hex < EscapeSequence::Base; end
37
- class Codepoint < EscapeSequence::Base; end
20
+ CodepointList = Class.new(Base) # e.g. \u{A B}
38
21
 
39
- class CodepointList < EscapeSequence::Base
40
- def char
41
- raise NoMethodError, 'CodepointList responds only to #chars'
42
- end
43
-
44
- def codepoint
45
- raise NoMethodError, 'CodepointList responds only to #codepoints'
46
- end
47
-
48
- def chars
49
- codepoints.map { |cp| cp.chr('utf-8') }
50
- end
51
-
52
- def codepoints
53
- text.scan(/\h+/).map(&:hex)
54
- end
55
- end
56
-
57
- class Octal < EscapeSequence::Base
58
- def char
59
- text[1..-1].to_i(8).chr('utf-8')
60
- end
61
- end
62
-
63
- class AbstractMetaControlSequence < EscapeSequence::Base
64
- def char
65
- codepoint.chr('utf-8')
66
- end
67
-
68
- private
69
-
70
- def control_sequence_to_s(control_sequence)
71
- five_lsb = control_sequence.unpack('B*').first[-5..-1]
72
- ["000#{five_lsb}"].pack('B*')
73
- end
74
-
75
- def meta_char_to_codepoint(meta_char)
76
- byte_value = meta_char.ord
77
- byte_value < 128 ? byte_value + 128 : byte_value
78
- end
79
- end
80
-
81
- class Control < AbstractMetaControlSequence
82
- def codepoint
83
- control_sequence_to_s(text).ord
84
- end
85
- end
86
-
87
- class Meta < AbstractMetaControlSequence
88
- def codepoint
89
- meta_char_to_codepoint(text[-1])
90
- end
91
- end
92
-
93
- class MetaControl < AbstractMetaControlSequence
94
- def codepoint
95
- meta_char_to_codepoint(control_sequence_to_s(text))
96
- end
97
- end
22
+ AbstractMetaControlSequence = Class.new(Base)
23
+ Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
24
+ Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
25
+ MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
98
26
  end
99
27
 
100
28
  # alias for symmetry between Token::* and Expression::*
@@ -0,0 +1,5 @@
1
+ Regexp::Expression::EscapeSequence::Base.class_eval do
2
+ def char
3
+ codepoint.chr('utf-8')
4
+ end
5
+ end
@@ -0,0 +1,68 @@
1
+ module Regexp::Expression::EscapeSequence
2
+ AsciiEscape.class_eval { def codepoint; 0x1B end }
3
+ Backspace.class_eval { def codepoint; 0x8 end }
4
+ Bell.class_eval { def codepoint; 0x7 end }
5
+ FormFeed.class_eval { def codepoint; 0xC end }
6
+ Newline.class_eval { def codepoint; 0xA end }
7
+ Return.class_eval { def codepoint; 0xD end }
8
+ Tab.class_eval { def codepoint; 0x9 end }
9
+ VerticalTab.class_eval { def codepoint; 0xB end }
10
+
11
+ Literal.class_eval { def codepoint; text[1].ord end }
12
+
13
+ Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
14
+
15
+ Hex.class_eval { def codepoint; text[/\h+/].hex end }
16
+ Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
17
+
18
+ CodepointList.class_eval do
19
+ # Maybe this should be a unique top-level expression class?
20
+ def char
21
+ raise NoMethodError, 'CodepointList responds only to #chars'
22
+ end
23
+
24
+ def codepoint
25
+ raise NoMethodError, 'CodepointList responds only to #codepoints'
26
+ end
27
+
28
+ def chars
29
+ codepoints.map { |cp| cp.chr('utf-8') }
30
+ end
31
+
32
+ def codepoints
33
+ text.scan(/\h+/).map(&:hex)
34
+ end
35
+ end
36
+
37
+ AbstractMetaControlSequence.class_eval do
38
+ private
39
+
40
+ def control_sequence_to_s(control_sequence)
41
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
42
+ ["000#{five_lsb}"].pack('B*')
43
+ end
44
+
45
+ def meta_char_to_codepoint(meta_char)
46
+ byte_value = meta_char.ord
47
+ byte_value < 128 ? byte_value + 128 : byte_value
48
+ end
49
+ end
50
+
51
+ Control.class_eval do
52
+ def codepoint
53
+ control_sequence_to_s(text).ord
54
+ end
55
+ end
56
+
57
+ Meta.class_eval do
58
+ def codepoint
59
+ meta_char_to_codepoint(text[-1])
60
+ end
61
+ end
62
+
63
+ MetaControl.class_eval do
64
+ def codepoint
65
+ meta_char_to_codepoint(control_sequence_to_s(text))
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,28 @@
1
+ module Regexp::Expression
2
+ module ReferencedExpressions
3
+ attr_accessor :referenced_expressions
4
+
5
+ def referenced_expression
6
+ referenced_expressions && referenced_expressions.first
7
+ end
8
+
9
+ def initialize_copy(orig)
10
+ exp_id = [self.class, self.starts_at]
11
+
12
+ # prevent infinite recursion for recursive subexp calls
13
+ copied = self.class.instance_eval { @copied_ref_exps ||= {} }
14
+ self.referenced_expressions =
15
+ if copied[exp_id]
16
+ orig.referenced_expressions
17
+ else
18
+ copied[exp_id] = true
19
+ orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
20
+ end
21
+ copied.clear
22
+
23
+ super
24
+ end
25
+ end
26
+
27
+ Base.include ReferencedExpressions
28
+ end
@@ -25,6 +25,8 @@ require_relative 'expression/classes/root'
25
25
  require_relative 'expression/classes/unicode_property'
26
26
 
27
27
  require_relative 'expression/methods/construct'
28
+ require_relative 'expression/methods/escape_sequence_char'
29
+ require_relative 'expression/methods/escape_sequence_codepoint'
28
30
  require_relative 'expression/methods/human_name'
29
31
  require_relative 'expression/methods/match'
30
32
  require_relative 'expression/methods/match_length'
@@ -32,6 +34,7 @@ require_relative 'expression/methods/negative'
32
34
  require_relative 'expression/methods/options'
33
35
  require_relative 'expression/methods/parts'
34
36
  require_relative 'expression/methods/printing'
37
+ require_relative 'expression/methods/referenced_expressions'
35
38
  require_relative 'expression/methods/strfregexp'
36
39
  require_relative 'expression/methods/tests'
37
40
  require_relative 'expression/methods/traverse'
@@ -580,16 +580,19 @@ class Regexp::Parser
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
582
  # find all referenceable and referring expressions
583
- targets = { 0 => root }
583
+ targets = { 0 => [root] }
584
584
  referrers = []
585
585
  root.each_expression do |exp|
586
- exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
- referrers << exp if exp.referential?
586
+ if exp.referential?
587
+ referrers << exp
588
+ elsif exp.is_a?(Group::Capture)
589
+ (targets[exp.identifier] ||= []) << exp
590
+ end
588
591
  end
589
- # assign reference expression to referring expressions
592
+ # assign referenced expressions to referring expressions
590
593
  # (in a second iteration because there might be forward references)
591
594
  referrers.each do |exp|
592
- exp.referenced_expression = targets[exp.reference] ||
595
+ exp.referenced_expressions = targets[exp.reference] ||
593
596
  raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
594
597
  end
595
598
  end
@@ -78,8 +78,8 @@
78
78
  # try to treat every other group head as options group, like Ruby
79
79
  group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
80
80
 
81
- group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
82
- group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
81
+ group_name_id_ab = ([^!=0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
82
+ group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
83
83
  group_number = '-'? . [0-9]+;
84
84
  group_level = [+\-] . [0-9]+;
85
85