regexp_parser 2.9.2 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c88d5bc178e9bf95a8a008d9d5e9d8cf1b4a8bb0d65310901a995daa448a28f4
4
- data.tar.gz: 47c1ed4782981f5cc2a0bb7bd8f402e360cd60ebeba33615df0c94dd3842b48c
3
+ metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
4
+ data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
5
5
  SHA512:
6
- metadata.gz: 5dc1bf229c259b762ea38f459f70a9a04e5ee08207fbae04bdf9045f9f2b1c0f0b6a716a3e08fda55ca0b769ef55f480f7f0e19f3412175fdc7a475362889ab3
7
- data.tar.gz: 5de692c1cce8f2436936752d0cf6c5ea51d84bb9c63110dcc49621a476b47800300911952f4d4a687c81f151886bc5570b14af559d74b5196b63e13c684ab7c5
6
+ metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
7
+ data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
@@ -1,25 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Backreference
3
- class Base < Regexp::Expression::Base
4
- attr_accessor :referenced_expression
5
-
6
- def initialize_copy(orig)
7
- exp_id = [self.class, self.starts_at]
8
-
9
- # prevent infinite recursion for recursive subexp calls
10
- copied = @@copied ||= {}
11
- self.referenced_expression =
12
- if copied[exp_id]
13
- orig.referenced_expression
14
- else
15
- copied[exp_id] = true
16
- orig.referenced_expression.dup
17
- end
18
- copied.clear
19
-
20
- super
21
- end
22
- end
3
+ class Base < Regexp::Expression::Base; end
23
4
 
24
5
  class Number < Backreference::Base
25
6
  attr_reader :number
@@ -7,26 +7,17 @@ module Regexp::Expression
7
7
  end
8
8
 
9
9
  class Condition < Regexp::Expression::Base
10
- attr_accessor :referenced_expression
11
-
12
10
  # Name or number of the referenced capturing group that determines state.
13
11
  # Returns a String if reference is by name, Integer if by number.
14
12
  def reference
15
13
  ref = text.tr("'<>()", "")
16
14
  ref =~ /\D/ ? ref : Integer(ref)
17
15
  end
18
-
19
- def initialize_copy(orig)
20
- self.referenced_expression = orig.referenced_expression.dup
21
- super
22
- end
23
16
  end
24
17
 
25
18
  class Branch < Regexp::Expression::Sequence; end
26
19
 
27
20
  class Expression < Regexp::Expression::Subexpression
28
- attr_accessor :referenced_expression
29
-
30
21
  def <<(exp)
31
22
  expressions.last << exp
32
23
  end
@@ -54,11 +45,6 @@ module Regexp::Expression
54
45
  def reference
55
46
  condition.reference
56
47
  end
57
-
58
- def initialize_copy(orig)
59
- self.referenced_expression = orig.referenced_expression.dup
60
- super
61
- end
62
48
  end
63
49
  end
64
50
  end
@@ -1,100 +1,28 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
- class Base < Regexp::Expression::Base
4
- def codepoint
5
- char.ord
6
- end
3
+ Base = Class.new(Regexp::Expression::Base)
7
4
 
8
- if ''.respond_to?(:undump)
9
- def char
10
- %("#{text}").undump
11
- end
12
- else
13
- # poor man's unescape without using eval
14
- require 'yaml'
15
- def char
16
- YAML.load(%Q(---\n"#{text}"\n))
17
- end
18
- end
19
- end
5
+ AsciiEscape = Class.new(Base) # \e
6
+ Backspace = Class.new(Base) # \b
7
+ Bell = Class.new(Base) # \a
8
+ FormFeed = Class.new(Base) # \f
9
+ Newline = Class.new(Base) # \n
10
+ Return = Class.new(Base) # \r
11
+ Tab = Class.new(Base) # \t
12
+ VerticalTab = Class.new(Base) # \v
20
13
 
21
- class Literal < EscapeSequence::Base
22
- def char
23
- text[1..-1]
24
- end
25
- end
14
+ Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
26
15
 
27
- class AsciiEscape < EscapeSequence::Base; end
28
- class Backspace < EscapeSequence::Base; end
29
- class Bell < EscapeSequence::Base; end
30
- class FormFeed < EscapeSequence::Base; end
31
- class Newline < EscapeSequence::Base; end
32
- class Return < EscapeSequence::Base; end
33
- class Tab < EscapeSequence::Base; end
34
- class VerticalTab < EscapeSequence::Base; end
16
+ Octal = Class.new(Base) # e.g. \012
17
+ Hex = Class.new(Base) # e.g. \x0A
18
+ Codepoint = Class.new(Base) # e.g. \u000A
35
19
 
36
- class Hex < EscapeSequence::Base; end
37
- class Codepoint < EscapeSequence::Base; end
20
+ CodepointList = Class.new(Base) # e.g. \u{A B}
38
21
 
39
- class CodepointList < EscapeSequence::Base
40
- def char
41
- raise NoMethodError, 'CodepointList responds only to #chars'
42
- end
43
-
44
- def codepoint
45
- raise NoMethodError, 'CodepointList responds only to #codepoints'
46
- end
47
-
48
- def chars
49
- codepoints.map { |cp| cp.chr('utf-8') }
50
- end
51
-
52
- def codepoints
53
- text.scan(/\h+/).map(&:hex)
54
- end
55
- end
56
-
57
- class Octal < EscapeSequence::Base
58
- def char
59
- text[1..-1].to_i(8).chr('utf-8')
60
- end
61
- end
62
-
63
- class AbstractMetaControlSequence < EscapeSequence::Base
64
- def char
65
- codepoint.chr('utf-8')
66
- end
67
-
68
- private
69
-
70
- def control_sequence_to_s(control_sequence)
71
- five_lsb = control_sequence.unpack('B*').first[-5..-1]
72
- ["000#{five_lsb}"].pack('B*')
73
- end
74
-
75
- def meta_char_to_codepoint(meta_char)
76
- byte_value = meta_char.ord
77
- byte_value < 128 ? byte_value + 128 : byte_value
78
- end
79
- end
80
-
81
- class Control < AbstractMetaControlSequence
82
- def codepoint
83
- control_sequence_to_s(text).ord
84
- end
85
- end
86
-
87
- class Meta < AbstractMetaControlSequence
88
- def codepoint
89
- meta_char_to_codepoint(text[-1])
90
- end
91
- end
92
-
93
- class MetaControl < AbstractMetaControlSequence
94
- def codepoint
95
- meta_char_to_codepoint(control_sequence_to_s(text))
96
- end
97
- end
22
+ AbstractMetaControlSequence = Class.new(Base)
23
+ Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
24
+ Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
25
+ MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
98
26
  end
99
27
 
100
28
  # alias for symmetry between Token::* and Expression::*
@@ -0,0 +1,5 @@
1
+ Regexp::Expression::EscapeSequence::Base.class_eval do
2
+ def char
3
+ codepoint.chr('utf-8')
4
+ end
5
+ end
@@ -0,0 +1,68 @@
1
+ module Regexp::Expression::EscapeSequence
2
+ AsciiEscape.class_eval { def codepoint; 0x1B end }
3
+ Backspace.class_eval { def codepoint; 0x8 end }
4
+ Bell.class_eval { def codepoint; 0x7 end }
5
+ FormFeed.class_eval { def codepoint; 0xC end }
6
+ Newline.class_eval { def codepoint; 0xA end }
7
+ Return.class_eval { def codepoint; 0xD end }
8
+ Tab.class_eval { def codepoint; 0x9 end }
9
+ VerticalTab.class_eval { def codepoint; 0xB end }
10
+
11
+ Literal.class_eval { def codepoint; text[1].ord end }
12
+
13
+ Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
14
+
15
+ Hex.class_eval { def codepoint; text[/\h+/].hex end }
16
+ Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
17
+
18
+ CodepointList.class_eval do
19
+ # Maybe this should be a unique top-level expression class?
20
+ def char
21
+ raise NoMethodError, 'CodepointList responds only to #chars'
22
+ end
23
+
24
+ def codepoint
25
+ raise NoMethodError, 'CodepointList responds only to #codepoints'
26
+ end
27
+
28
+ def chars
29
+ codepoints.map { |cp| cp.chr('utf-8') }
30
+ end
31
+
32
+ def codepoints
33
+ text.scan(/\h+/).map(&:hex)
34
+ end
35
+ end
36
+
37
+ AbstractMetaControlSequence.class_eval do
38
+ private
39
+
40
+ def control_sequence_to_s(control_sequence)
41
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
42
+ ["000#{five_lsb}"].pack('B*')
43
+ end
44
+
45
+ def meta_char_to_codepoint(meta_char)
46
+ byte_value = meta_char.ord
47
+ byte_value < 128 ? byte_value + 128 : byte_value
48
+ end
49
+ end
50
+
51
+ Control.class_eval do
52
+ def codepoint
53
+ control_sequence_to_s(text).ord
54
+ end
55
+ end
56
+
57
+ Meta.class_eval do
58
+ def codepoint
59
+ meta_char_to_codepoint(text[-1])
60
+ end
61
+ end
62
+
63
+ MetaControl.class_eval do
64
+ def codepoint
65
+ meta_char_to_codepoint(control_sequence_to_s(text))
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,28 @@
1
+ module Regexp::Expression
2
+ module ReferencedExpressions
3
+ attr_accessor :referenced_expressions
4
+
5
+ def referenced_expression
6
+ referenced_expressions && referenced_expressions.first
7
+ end
8
+
9
+ def initialize_copy(orig)
10
+ exp_id = [self.class, self.starts_at]
11
+
12
+ # prevent infinite recursion for recursive subexp calls
13
+ copied = self.class.instance_eval { @copied_ref_exps ||= {} }
14
+ self.referenced_expressions =
15
+ if copied[exp_id]
16
+ orig.referenced_expressions
17
+ else
18
+ copied[exp_id] = true
19
+ orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
20
+ end
21
+ copied.clear
22
+
23
+ super
24
+ end
25
+ end
26
+
27
+ Base.include ReferencedExpressions
28
+ end
@@ -25,6 +25,8 @@ require_relative 'expression/classes/root'
25
25
  require_relative 'expression/classes/unicode_property'
26
26
 
27
27
  require_relative 'expression/methods/construct'
28
+ require_relative 'expression/methods/escape_sequence_char'
29
+ require_relative 'expression/methods/escape_sequence_codepoint'
28
30
  require_relative 'expression/methods/human_name'
29
31
  require_relative 'expression/methods/match'
30
32
  require_relative 'expression/methods/match_length'
@@ -32,6 +34,7 @@ require_relative 'expression/methods/negative'
32
34
  require_relative 'expression/methods/options'
33
35
  require_relative 'expression/methods/parts'
34
36
  require_relative 'expression/methods/printing'
37
+ require_relative 'expression/methods/referenced_expressions'
35
38
  require_relative 'expression/methods/strfregexp'
36
39
  require_relative 'expression/methods/tests'
37
40
  require_relative 'expression/methods/traverse'
@@ -580,16 +580,19 @@ class Regexp::Parser
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
582
  # find all referenceable and referring expressions
583
- targets = { 0 => root }
583
+ targets = { 0 => [root] }
584
584
  referrers = []
585
585
  root.each_expression do |exp|
586
- exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
- referrers << exp if exp.referential?
586
+ if exp.referential?
587
+ referrers << exp
588
+ elsif exp.is_a?(Group::Capture)
589
+ (targets[exp.identifier] ||= []) << exp
590
+ end
588
591
  end
589
- # assign reference expression to referring expressions
592
+ # assign referenced expressions to referring expressions
590
593
  # (in a second iteration because there might be forward references)
591
594
  referrers.each do |exp|
592
- exp.referenced_expression = targets[exp.reference] ||
595
+ exp.referenced_expressions = targets[exp.reference] ||
593
596
  raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
594
597
  end
595
598
  end
@@ -78,8 +78,8 @@
78
78
  # try to treat every other group head as options group, like Ruby
79
79
  group_options = '?' . ( [^!#'():<=>~]+ . ':'? ) ?;
80
80
 
81
- group_name_id_ab = ([^!0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
82
- group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
81
+ group_name_id_ab = ([^!=0-9\->] | utf8_multibyte) . ([^>] | utf8_multibyte)*;
82
+ group_name_id_sq = ([^0-9\-'] | utf8_multibyte) . ([^'] | utf8_multibyte)*;
83
83
  group_number = '-'? . [0-9]+;
84
84
  group_level = [+\-] . [0-9]+;
85
85