regexp_parser 2.9.3 → 2.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e429c2cc03b2c9c31e3bf5c3dc71ffc15c5032a35f52c3abae9134d02c45496f
4
- data.tar.gz: 6a89f8618748c8ab479c4d81ff44c9fabfb461337993fcc641da23d6c349a1ec
3
+ metadata.gz: f6ed5457d89738fa1076cf3875cd2d009973f02857ea68e055ef3ef74a78dc91
4
+ data.tar.gz: d67eb5f0cb37ad106574b2ae327eefcfc13c9d585cddec6661898f4d8166ebcc
5
5
  SHA512:
6
- metadata.gz: 8f9cb8133b24db6f8bb2199356101c234960839ae1251a77da6fe4faeaafc2ab1d6f679f5a6e081860d4a9137a91aeb7793dbc617f04c9747b9110d64134d45f
7
- data.tar.gz: bdfe1c9a13fef4f891c28787588c92be37c1c8a61e0d473d05482a67207675be090ec5a74d12b46cf93c8d2565388c6680acd89ad65bc63454514ee720181c82
6
+ metadata.gz: 6b8adbc3c4707fc4c823456ae1d7547f17568802de03008a17fef18a5f95af08b0e42d48ccdfab25a740603a58ab89c036d70cec94405701201e5a5af51ce392
7
+ data.tar.gz: 9bea98a42ab64a9b45ddc5564cd077d7eb6d2ddc293844759bb8001aa9fefd8aa26b0e03fff7a286ccde9f7aeacacda9fbb187fe04082749d3c2605e0cece7b9
@@ -1,25 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Backreference
3
- class Base < Regexp::Expression::Base
4
- attr_accessor :referenced_expression
5
-
6
- def initialize_copy(orig)
7
- exp_id = [self.class, self.starts_at]
8
-
9
- # prevent infinite recursion for recursive subexp calls
10
- copied = @@copied ||= {}
11
- self.referenced_expression =
12
- if copied[exp_id]
13
- orig.referenced_expression
14
- else
15
- copied[exp_id] = true
16
- orig.referenced_expression.dup
17
- end
18
- copied.clear
19
-
20
- super
21
- end
22
- end
3
+ class Base < Regexp::Expression::Base; end
23
4
 
24
5
  class Number < Backreference::Base
25
6
  attr_reader :number
@@ -7,26 +7,17 @@ module Regexp::Expression
7
7
  end
8
8
 
9
9
  class Condition < Regexp::Expression::Base
10
- attr_accessor :referenced_expression
11
-
12
10
  # Name or number of the referenced capturing group that determines state.
13
11
  # Returns a String if reference is by name, Integer if by number.
14
12
  def reference
15
13
  ref = text.tr("'<>()", "")
16
14
  ref =~ /\D/ ? ref : Integer(ref)
17
15
  end
18
-
19
- def initialize_copy(orig)
20
- self.referenced_expression = orig.referenced_expression.dup
21
- super
22
- end
23
16
  end
24
17
 
25
18
  class Branch < Regexp::Expression::Sequence; end
26
19
 
27
20
  class Expression < Regexp::Expression::Subexpression
28
- attr_accessor :referenced_expression
29
-
30
21
  def <<(exp)
31
22
  expressions.last << exp
32
23
  end
@@ -54,11 +45,6 @@ module Regexp::Expression
54
45
  def reference
55
46
  condition.reference
56
47
  end
57
-
58
- def initialize_copy(orig)
59
- self.referenced_expression = orig.referenced_expression.dup
60
- super
61
- end
62
48
  end
63
49
  end
64
50
  end
@@ -1,100 +1,28 @@
1
1
  module Regexp::Expression
2
2
  module EscapeSequence
3
- class Base < Regexp::Expression::Base
4
- def codepoint
5
- char.ord
6
- end
3
+ Base = Class.new(Regexp::Expression::Base)
7
4
 
8
- if ''.respond_to?(:undump)
9
- def char
10
- %("#{text}").undump
11
- end
12
- else
13
- # poor man's unescape without using eval
14
- require 'yaml'
15
- def char
16
- YAML.load(%Q(---\n"#{text}"\n))
17
- end
18
- end
19
- end
5
+ AsciiEscape = Class.new(Base) # \e
6
+ Backspace = Class.new(Base) # \b
7
+ Bell = Class.new(Base) # \a
8
+ FormFeed = Class.new(Base) # \f
9
+ Newline = Class.new(Base) # \n
10
+ Return = Class.new(Base) # \r
11
+ Tab = Class.new(Base) # \t
12
+ VerticalTab = Class.new(Base) # \v
20
13
 
21
- class Literal < EscapeSequence::Base
22
- def char
23
- text[1..-1]
24
- end
25
- end
14
+ Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
26
15
 
27
- class AsciiEscape < EscapeSequence::Base; end
28
- class Backspace < EscapeSequence::Base; end
29
- class Bell < EscapeSequence::Base; end
30
- class FormFeed < EscapeSequence::Base; end
31
- class Newline < EscapeSequence::Base; end
32
- class Return < EscapeSequence::Base; end
33
- class Tab < EscapeSequence::Base; end
34
- class VerticalTab < EscapeSequence::Base; end
16
+ Octal = Class.new(Base) # e.g. \012
17
+ Hex = Class.new(Base) # e.g. \x0A
18
+ Codepoint = Class.new(Base) # e.g. \u000A
35
19
 
36
- class Hex < EscapeSequence::Base; end
37
- class Codepoint < EscapeSequence::Base; end
20
+ CodepointList = Class.new(Base) # e.g. \u{A B}
38
21
 
39
- class CodepointList < EscapeSequence::Base
40
- def char
41
- raise NoMethodError, 'CodepointList responds only to #chars'
42
- end
43
-
44
- def codepoint
45
- raise NoMethodError, 'CodepointList responds only to #codepoints'
46
- end
47
-
48
- def chars
49
- codepoints.map { |cp| cp.chr('utf-8') }
50
- end
51
-
52
- def codepoints
53
- text.scan(/\h+/).map(&:hex)
54
- end
55
- end
56
-
57
- class Octal < EscapeSequence::Base
58
- def char
59
- text[1..-1].to_i(8).chr('utf-8')
60
- end
61
- end
62
-
63
- class AbstractMetaControlSequence < EscapeSequence::Base
64
- def char
65
- codepoint.chr('utf-8')
66
- end
67
-
68
- private
69
-
70
- def control_sequence_to_s(control_sequence)
71
- five_lsb = control_sequence.unpack('B*').first[-5..-1]
72
- ["000#{five_lsb}"].pack('B*')
73
- end
74
-
75
- def meta_char_to_codepoint(meta_char)
76
- byte_value = meta_char.ord
77
- byte_value < 128 ? byte_value + 128 : byte_value
78
- end
79
- end
80
-
81
- class Control < AbstractMetaControlSequence
82
- def codepoint
83
- control_sequence_to_s(text).ord
84
- end
85
- end
86
-
87
- class Meta < AbstractMetaControlSequence
88
- def codepoint
89
- meta_char_to_codepoint(text[-1])
90
- end
91
- end
92
-
93
- class MetaControl < AbstractMetaControlSequence
94
- def codepoint
95
- meta_char_to_codepoint(control_sequence_to_s(text))
96
- end
97
- end
22
+ AbstractMetaControlSequence = Class.new(Base)
23
+ Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
24
+ Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
25
+ MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
98
26
  end
99
27
 
100
28
  # alias for symmetry between Token::* and Expression::*
@@ -0,0 +1,5 @@
1
+ Regexp::Expression::EscapeSequence::Base.class_eval do
2
+ def char
3
+ codepoint.chr('utf-8')
4
+ end
5
+ end
@@ -0,0 +1,68 @@
1
+ module Regexp::Expression::EscapeSequence
2
+ AsciiEscape.class_eval { def codepoint; 0x1B end }
3
+ Backspace.class_eval { def codepoint; 0x8 end }
4
+ Bell.class_eval { def codepoint; 0x7 end }
5
+ FormFeed.class_eval { def codepoint; 0xC end }
6
+ Newline.class_eval { def codepoint; 0xA end }
7
+ Return.class_eval { def codepoint; 0xD end }
8
+ Tab.class_eval { def codepoint; 0x9 end }
9
+ VerticalTab.class_eval { def codepoint; 0xB end }
10
+
11
+ Literal.class_eval { def codepoint; text[1].ord end }
12
+
13
+ Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
14
+
15
+ Hex.class_eval { def codepoint; text[/\h+/].hex end }
16
+ Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
17
+
18
+ CodepointList.class_eval do
19
+ # Maybe this should be a unique top-level expression class?
20
+ def char
21
+ raise NoMethodError, 'CodepointList responds only to #chars'
22
+ end
23
+
24
+ def codepoint
25
+ raise NoMethodError, 'CodepointList responds only to #codepoints'
26
+ end
27
+
28
+ def chars
29
+ codepoints.map { |cp| cp.chr('utf-8') }
30
+ end
31
+
32
+ def codepoints
33
+ text.scan(/\h+/).map(&:hex)
34
+ end
35
+ end
36
+
37
+ AbstractMetaControlSequence.class_eval do
38
+ private
39
+
40
+ def control_sequence_to_s(control_sequence)
41
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
42
+ ["000#{five_lsb}"].pack('B*')
43
+ end
44
+
45
+ def meta_char_to_codepoint(meta_char)
46
+ byte_value = meta_char.ord
47
+ byte_value < 128 ? byte_value + 128 : byte_value
48
+ end
49
+ end
50
+
51
+ Control.class_eval do
52
+ def codepoint
53
+ control_sequence_to_s(text).ord
54
+ end
55
+ end
56
+
57
+ Meta.class_eval do
58
+ def codepoint
59
+ meta_char_to_codepoint(text[-1])
60
+ end
61
+ end
62
+
63
+ MetaControl.class_eval do
64
+ def codepoint
65
+ meta_char_to_codepoint(control_sequence_to_s(text))
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,28 @@
1
+ module Regexp::Expression
2
+ module ReferencedExpressions
3
+ attr_accessor :referenced_expressions
4
+
5
+ def referenced_expression
6
+ referenced_expressions && referenced_expressions.first
7
+ end
8
+
9
+ def initialize_copy(orig)
10
+ exp_id = [self.class, self.starts_at]
11
+
12
+ # prevent infinite recursion for recursive subexp calls
13
+ copied = self.class.instance_eval { @copied_ref_exps ||= {} }
14
+ self.referenced_expressions =
15
+ if copied[exp_id]
16
+ orig.referenced_expressions
17
+ else
18
+ copied[exp_id] = true
19
+ orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
20
+ end
21
+ copied.clear
22
+
23
+ super
24
+ end
25
+ end
26
+
27
+ Base.include ReferencedExpressions
28
+ end
@@ -25,6 +25,8 @@ require_relative 'expression/classes/root'
25
25
  require_relative 'expression/classes/unicode_property'
26
26
 
27
27
  require_relative 'expression/methods/construct'
28
+ require_relative 'expression/methods/escape_sequence_char'
29
+ require_relative 'expression/methods/escape_sequence_codepoint'
28
30
  require_relative 'expression/methods/human_name'
29
31
  require_relative 'expression/methods/match'
30
32
  require_relative 'expression/methods/match_length'
@@ -32,6 +34,7 @@ require_relative 'expression/methods/negative'
32
34
  require_relative 'expression/methods/options'
33
35
  require_relative 'expression/methods/parts'
34
36
  require_relative 'expression/methods/printing'
37
+ require_relative 'expression/methods/referenced_expressions'
35
38
  require_relative 'expression/methods/strfregexp'
36
39
  require_relative 'expression/methods/tests'
37
40
  require_relative 'expression/methods/traverse'
@@ -580,16 +580,19 @@ class Regexp::Parser
580
580
  # the instance of Group::Capture that it refers to via its number.
581
581
  def assign_referenced_expressions
582
582
  # find all referenceable and referring expressions
583
- targets = { 0 => root }
583
+ targets = { 0 => [root] }
584
584
  referrers = []
585
585
  root.each_expression do |exp|
586
- exp.is_a?(Group::Capture) && targets[exp.identifier] = exp
587
- referrers << exp if exp.referential?
586
+ if exp.referential?
587
+ referrers << exp
588
+ elsif exp.is_a?(Group::Capture)
589
+ (targets[exp.identifier] ||= []) << exp
590
+ end
588
591
  end
589
- # assign reference expression to referring expressions
592
+ # assign referenced expressions to referring expressions
590
593
  # (in a second iteration because there might be forward references)
591
594
  referrers.each do |exp|
592
- exp.referenced_expression = targets[exp.reference] ||
595
+ exp.referenced_expressions = targets[exp.reference] ||
593
596
  raise(ParserError, "Invalid reference #{exp.reference} at pos #{exp.ts}")
594
597
  end
595
598
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '2.9.3'
3
+ VERSION = '2.10.0'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.9.3
4
+ version: 2.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2024-11-29 00:00:00.000000000 Z
12
+ date: 2024-12-25 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
15
15
  email:
@@ -43,6 +43,8 @@ files:
43
43
  - lib/regexp_parser/expression/classes/root.rb
44
44
  - lib/regexp_parser/expression/classes/unicode_property.rb
45
45
  - lib/regexp_parser/expression/methods/construct.rb
46
+ - lib/regexp_parser/expression/methods/escape_sequence_char.rb
47
+ - lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb
46
48
  - lib/regexp_parser/expression/methods/human_name.rb
47
49
  - lib/regexp_parser/expression/methods/match.rb
48
50
  - lib/regexp_parser/expression/methods/match_length.rb
@@ -50,6 +52,7 @@ files:
50
52
  - lib/regexp_parser/expression/methods/options.rb
51
53
  - lib/regexp_parser/expression/methods/parts.rb
52
54
  - lib/regexp_parser/expression/methods/printing.rb
55
+ - lib/regexp_parser/expression/methods/referenced_expressions.rb
53
56
  - lib/regexp_parser/expression/methods/strfregexp.rb
54
57
  - lib/regexp_parser/expression/methods/tests.rb
55
58
  - lib/regexp_parser/expression/methods/traverse.rb