regexp_parser 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Quantifier
3
+ MODES = [:greedy, :possessive, :reluctant]
4
+
4
5
  attr_reader :token, :text, :min, :max, :mode
5
6
 
6
7
  def initialize(token, text, min, max, mode)
@@ -30,6 +31,14 @@ module Regexp::Expression
30
31
  max: max,
31
32
  }
32
33
  end
33
- end
34
34
 
35
+ MODES.each do |mode|
36
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
37
+ def #{mode}?
38
+ mode.equal?(:#{mode})
39
+ end
40
+ RUBY
41
+ end
42
+ alias :lazy? :reluctant?
43
+ end
35
44
  end
@@ -4,20 +4,44 @@ module Regexp::Expression
4
4
  # quantifiers, as it applies them to its last element instead of itself as
5
5
  # a whole subexpression.
6
6
  #
7
- # Used as the base class for the Alternation alternatives and Conditional
8
- # branches.
7
+ # Used as the base class for the Alternation alternatives, Conditional
8
+ # branches, and CharacterSet::Intersection intersected sequences.
9
9
  class Sequence < Regexp::Expression::Subexpression
10
- def initialize(level, set_level, conditional_level)
11
- super Regexp::Token.new(
12
- :expression,
13
- :sequence,
14
- '',
15
- nil, # ts
16
- nil, # te
17
- level,
18
- set_level,
19
- conditional_level
20
- )
10
+ # TODO: this override is here for backwards compatibility, remove in 2.0.0
11
+ def initialize(*args)
12
+ if args.count == 3
13
+ warn('WARNING: Sequence.new without a Regexp::Token argument is '\
14
+ 'deprecated and will be removed in 2.0.0.')
15
+ return self.class.at_levels(*args)
16
+ end
17
+ super
18
+ end
19
+
20
+ class << self
21
+ def add_to(subexpression, options = {})
22
+ sequence = at_levels(
23
+ subexpression.level,
24
+ subexpression.set_level,
25
+ options[:conditional_level] || subexpression.conditional_level
26
+ )
27
+ sequence.nesting_level = subexpression.nesting_level + 1
28
+ subexpression.expressions << sequence
29
+ sequence
30
+ end
31
+
32
+ def at_levels(level, set_level, conditional_level)
33
+ token = Regexp::Token.new(
34
+ :expression,
35
+ :sequence,
36
+ '',
37
+ nil, # ts
38
+ nil, # te
39
+ level,
40
+ set_level,
41
+ conditional_level
42
+ )
43
+ new(token)
44
+ end
21
45
  end
22
46
 
23
47
  def text
@@ -15,20 +15,15 @@ module Regexp::Expression
15
15
  end
16
16
 
17
17
  def add_sequence
18
- exp = self.class::OPERAND.new(level, set_level, conditional_level)
19
- exp.nesting_level = nesting_level + 1
20
- expressions << exp
21
- exp
18
+ self.class::OPERAND.add_to(self)
22
19
  end
23
20
 
24
21
  def quantify(token, text, min = nil, max = nil, mode = :greedy)
25
22
  sequences.last.last.quantify(token, text, min, max, mode)
26
- sequences.last.last.quantify(token, text, min, max, mode)
27
23
  end
28
24
 
29
25
  def to_s(format = :full)
30
26
  sequences.map { |e| e.to_s(format) }.join(text)
31
- sequences.map { |e| e.to_s(format) }.join(text)
32
27
  end
33
28
  end
34
29
  end
@@ -1,6 +1,8 @@
1
1
  module Regexp::Expression
2
2
 
3
3
  class Subexpression < Regexp::Expression::Base
4
+ include Enumerable
5
+
4
6
  attr_accessor :expressions
5
7
 
6
8
  def initialize(token, options = {})
@@ -24,9 +26,18 @@ module Regexp::Expression
24
26
  end
25
27
  end
26
28
 
27
- %w[[] all? any? at collect count each each_with_index empty?
28
- fetch find first index join last length map values_at].each do |m|
29
- define_method(m) { |*args, &block| expressions.send(m, *args, &block) }
29
+ %w[[] at each empty? fetch index join last length values_at].each do |method|
30
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
31
+ def #{method}(*args, &block)
32
+ expressions.#{method}(*args, &block)
33
+ end
34
+ RUBY
35
+ end
36
+
37
+ def dig(*indices)
38
+ exp = self
39
+ indices.each { |idx| exp = exp.nil? || exp.terminal? ? nil : exp[idx] }
40
+ exp
30
41
  end
31
42
 
32
43
  def te
@@ -41,7 +52,7 @@ module Regexp::Expression
41
52
  end
42
53
 
43
54
  def to_h
44
- super.merge({
55
+ attributes.merge({
45
56
  text: to_s(:base),
46
57
  expressions: expressions.map(&:to_h)
47
58
  })
@@ -23,7 +23,7 @@ class Regexp::Parser
23
23
  end
24
24
 
25
25
  def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
26
- root = Root.new(options_from_input(input))
26
+ root = Root.build(options_from_input(input))
27
27
 
28
28
  self.root = root
29
29
  self.node = root
@@ -89,8 +89,7 @@ class Regexp::Parser
89
89
 
90
90
  def nest_conditional(exp)
91
91
  conditional_nesting.push(exp)
92
- node << exp
93
- self.node = exp
92
+ nest(exp)
94
93
  end
95
94
 
96
95
  def parse_token(token)
@@ -216,6 +215,7 @@ class Regexp::Parser
216
215
  self.node = conditional_nesting.last.branches.last
217
216
  when :close
218
217
  conditional_nesting.pop
218
+ decrease_nesting
219
219
 
220
220
  self.node =
221
221
  if conditional_nesting.empty?
@@ -496,25 +496,34 @@ class Regexp::Parser
496
496
  negative ||= ''
497
497
  self.switching_options = token.token.equal?(:options_switch)
498
498
 
499
- new_options = active_opts.dup
499
+ opt_changes = {}
500
+ new_active_opts = active_opts.dup
500
501
 
501
502
  # Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
502
503
  %w[i m x].each do |flag|
503
- new_options[flag.to_sym] = true if positive.include?(flag)
504
- new_options.delete(flag.to_sym) if negative.include?(flag)
504
+ if positive.include?(flag)
505
+ opt_changes[flag.to_sym] = new_active_opts[flag.to_sym] = true
506
+ end
507
+ if negative.include?(flag)
508
+ opt_changes[flag.to_sym] = false
509
+ new_active_opts.delete(flag.to_sym)
510
+ end
505
511
  end
506
512
 
507
513
  # Any encoding flag overrides all previous encoding flags. If there are
508
514
  # multiple encoding flags in an options string, the last one wins.
509
515
  # E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
510
516
  if (flag = positive.reverse[/[adu]/])
511
- %w[a d u].each { |key| new_options.delete(key.to_sym) }
512
- new_options[flag.to_sym] = true
517
+ %w[a d u].each { |key| new_active_opts.delete(key.to_sym) }
518
+ opt_changes[flag.to_sym] = new_active_opts[flag.to_sym] = true
513
519
  end
514
520
 
515
- options_stack << new_options
521
+ options_stack << new_active_opts
522
+
523
+ options_group = Group::Options.new(token, active_opts)
524
+ options_group.option_changes = opt_changes
516
525
 
517
- nest(Group::Options.new(token, active_opts))
526
+ nest(options_group)
518
527
  end
519
528
 
520
529
  def open_group(token)
@@ -2131,14 +2131,14 @@ te = p+1
2131
2131
  when /^\\([gk])''/ # single quotes
2132
2132
  empty_backref_error("ref/call (sq)")
2133
2133
 
2134
- when /^\\([gk])<[^\d-](\w+)?>/ # angle-brackets
2134
+ when /^\\([gk])<[^\d+-]\w*>/ # angle-brackets
2135
2135
  if $1 == 'k'
2136
2136
  emit(:backref, :name_ref_ab, text, ts, te)
2137
2137
  else
2138
2138
  emit(:backref, :name_call_ab, text, ts, te)
2139
2139
  end
2140
2140
 
2141
- when /^\\([gk])'[^\d-](\w+)?'/ #single quotes
2141
+ when /^\\([gk])'[^\d+-]\w*'/ #single quotes
2142
2142
  if $1 == 'k'
2143
2143
  emit(:backref, :name_ref_sq, text, ts, te)
2144
2144
  else
@@ -2159,30 +2159,30 @@ te = p+1
2159
2159
  emit(:backref, :number_call_sq, text, ts, te)
2160
2160
  end
2161
2161
 
2162
- when /^\\([gk])<-\d+>/ # angle-brackets
2162
+ when /^\\(?:g<\+|g<-|(k)<-)\d+>/ # angle-brackets
2163
2163
  if $1 == 'k'
2164
2164
  emit(:backref, :number_rel_ref_ab, text, ts, te)
2165
2165
  else
2166
2166
  emit(:backref, :number_rel_call_ab, text, ts, te)
2167
2167
  end
2168
2168
 
2169
- when /^\\([gk])'-\d+'/ # single quotes
2169
+ when /^\\(?:g'\+|g'-|(k)'-)\d+'/ # single quotes
2170
2170
  if $1 == 'k'
2171
2171
  emit(:backref, :number_rel_ref_sq, text, ts, te)
2172
2172
  else
2173
2173
  emit(:backref, :number_rel_call_sq, text, ts, te)
2174
2174
  end
2175
2175
 
2176
- when /^\\k<[^\d-](\w+)?[+\-]\d+>/ # angle-brackets
2176
+ when /^\\k<[^\d+\-]\w*[+\-]\d+>/ # angle-brackets
2177
2177
  emit(:backref, :name_recursion_ref_ab, text, ts, te)
2178
2178
 
2179
- when /^\\k'[^\d-](\w+)?[+\-]\d+'/ # single-quotes
2179
+ when /^\\k'[^\d+\-]\w*[+\-]\d+'/ # single-quotes
2180
2180
  emit(:backref, :name_recursion_ref_sq, text, ts, te)
2181
2181
 
2182
- when /^\\([gk])<-?\d+[+\-]\d+>/ # angle-brackets
2182
+ when /^\\([gk])<[+\-]?\d+[+\-]\d+>/ # angle-brackets
2183
2183
  emit(:backref, :number_recursion_ref_ab, text, ts, te)
2184
2184
 
2185
- when /^\\([gk])'-?\d+[+\-]\d+'/ # single-quotes
2185
+ when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
2186
2186
  emit(:backref, :number_recursion_ref_sq, text, ts, te)
2187
2187
 
2188
2188
  else
@@ -582,14 +582,14 @@
582
582
  when /^\\([gk])''/ # single quotes
583
583
  empty_backref_error("ref/call (sq)")
584
584
 
585
- when /^\\([gk])<[^\d-](\w+)?>/ # angle-brackets
585
+ when /^\\([gk])<[^\d+-]\w*>/ # angle-brackets
586
586
  if $1 == 'k'
587
587
  emit(:backref, :name_ref_ab, text, ts, te)
588
588
  else
589
589
  emit(:backref, :name_call_ab, text, ts, te)
590
590
  end
591
591
 
592
- when /^\\([gk])'[^\d-](\w+)?'/ #single quotes
592
+ when /^\\([gk])'[^\d+-]\w*'/ #single quotes
593
593
  if $1 == 'k'
594
594
  emit(:backref, :name_ref_sq, text, ts, te)
595
595
  else
@@ -610,30 +610,30 @@
610
610
  emit(:backref, :number_call_sq, text, ts, te)
611
611
  end
612
612
 
613
- when /^\\([gk])<-\d+>/ # angle-brackets
613
+ when /^\\(?:g<\+|g<-|(k)<-)\d+>/ # angle-brackets
614
614
  if $1 == 'k'
615
615
  emit(:backref, :number_rel_ref_ab, text, ts, te)
616
616
  else
617
617
  emit(:backref, :number_rel_call_ab, text, ts, te)
618
618
  end
619
619
 
620
- when /^\\([gk])'-\d+'/ # single quotes
620
+ when /^\\(?:g'\+|g'-|(k)'-)\d+'/ # single quotes
621
621
  if $1 == 'k'
622
622
  emit(:backref, :number_rel_ref_sq, text, ts, te)
623
623
  else
624
624
  emit(:backref, :number_rel_call_sq, text, ts, te)
625
625
  end
626
626
 
627
- when /^\\k<[^\d-](\w+)?[+\-]\d+>/ # angle-brackets
627
+ when /^\\k<[^\d+\-]\w*[+\-]\d+>/ # angle-brackets
628
628
  emit(:backref, :name_recursion_ref_ab, text, ts, te)
629
629
 
630
- when /^\\k'[^\d-](\w+)?[+\-]\d+'/ # single-quotes
630
+ when /^\\k'[^\d+\-]\w*[+\-]\d+'/ # single-quotes
631
631
  emit(:backref, :name_recursion_ref_sq, text, ts, te)
632
632
 
633
- when /^\\([gk])<-?\d+[+\-]\d+>/ # angle-brackets
633
+ when /^\\([gk])<[+\-]?\d+[+\-]\d+>/ # angle-brackets
634
634
  emit(:backref, :number_recursion_ref_ab, text, ts, te)
635
635
 
636
- when /^\\([gk])'-?\d+[+\-]\d+'/ # single-quotes
636
+ when /^\\([gk])'[+\-]?\d+[+\-]\d+'/ # single-quotes
637
637
  emit(:backref, :number_recursion_ref_sq, text, ts, te)
638
638
 
639
639
  else
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.0.0'
3
+ VERSION = '1.2.0'
4
4
  end
5
5
  end
@@ -46,4 +46,13 @@ class ExpressionSubexpression < Test::Unit::TestCase
46
46
 
47
47
  assert tests.empty?
48
48
  end
49
+
50
+ def test_subexpression_dig
51
+ root = RP.parse(/(((a)))/)
52
+
53
+ assert_equal '(((a)))', root.dig(0).to_s
54
+ assert_equal 'a', root.dig(0, 0, 0, 0).to_s
55
+ assert_nil root.dig(0, 0, 0, 0, 0)
56
+ assert_nil root.dig(3, 7)
57
+ end
49
58
  end
@@ -26,6 +26,9 @@ class LexerRefCalls < Test::Unit::TestCase
26
26
  '(abc)\g<-1>' => [3, :backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0],
27
27
  "(abc)\\g'-1'" => [3, :backref, :number_rel_call, "\\g'-1'", 5, 11, 0, 0, 0],
28
28
 
29
+ '(abc)\g<+1>' => [3, :backref, :number_rel_call, '\g<+1>', 5, 11, 0, 0, 0],
30
+ "(abc)\\g'+1'" => [3, :backref, :number_rel_call, "\\g'+1'", 5, 11, 0, 0, 0],
31
+
29
32
  # Group back-references, with nesting level
30
33
  '(?<X>abc)\k<X-0>' => [3, :backref, :name_recursion_ref, '\k<X-0>', 9, 16, 0, 0, 0],
31
34
  "(?<X>abc)\\k'X-0'" => [3, :backref, :name_recursion_ref, "\\k'X-0'", 9, 16, 0, 0, 0],
@@ -1,9 +1,9 @@
1
1
  require File.expand_path("../../helpers", __FILE__)
2
2
 
3
- %w{
3
+ %w[
4
4
  alternation anchors errors escapes free_space groups
5
5
  posix_classes properties quantifiers refcalls sets types
6
- }.each do|tc|
6
+ ].each do |tc|
7
7
  require File.expand_path("../test_#{tc}", __FILE__)
8
8
  end
9
9
 
@@ -11,7 +11,7 @@ require File.expand_path('../set/test_ranges.rb', __FILE__)
11
11
  require File.expand_path('../set/test_intersections.rb', __FILE__)
12
12
 
13
13
  if RUBY_VERSION >= '2.0.0'
14
- %w{conditionals keep}.each do|tc|
14
+ %w[conditionals keep].each do |tc|
15
15
  require File.expand_path("../test_#{tc}", __FILE__)
16
16
  end
17
17
  end
@@ -11,12 +11,12 @@ class TestParserConditionals < Test::Unit::TestCase
11
11
  assert exp.is_a?(Conditional::Expression),
12
12
  "Expected Condition, but got #{exp.class.name}"
13
13
 
14
- assert_equal exp.type, :conditional
15
- assert_equal exp.token, :open
16
- assert_equal exp.text, '(?'
14
+ assert_equal exp.type, :conditional
15
+ assert_equal exp.token, :open
16
+ assert_equal exp.text, '(?'
17
+ assert_equal exp.reference, 'A'
17
18
  end
18
19
 
19
-
20
20
  def test_parse_conditional_condition
21
21
  regexp = /(?<A>a)(?(<A>)T|F)/
22
22
 
@@ -26,11 +26,26 @@ class TestParserConditionals < Test::Unit::TestCase
26
26
  assert exp.is_a?(Conditional::Condition),
27
27
  "Expected Condition, but got #{exp.class.name}"
28
28
 
29
- assert_equal exp.type, :conditional
30
- assert_equal exp.token, :condition
31
- assert_equal exp.text, '(<A>)'
29
+ assert_equal exp.type, :conditional
30
+ assert_equal exp.token, :condition
31
+ assert_equal exp.text, '(<A>)'
32
+ assert_equal exp.reference, 'A'
32
33
  end
33
34
 
35
+ def test_parse_conditional_condition_with_number_ref
36
+ regexp = /(a)(?(1)T|F)/
37
+
38
+ root = RP.parse(regexp, 'ruby/2.0')
39
+ exp = root[1].condition
40
+
41
+ assert exp.is_a?(Conditional::Condition),
42
+ "Expected Condition, but got #{exp.class.name}"
43
+
44
+ assert_equal exp.type, :conditional
45
+ assert_equal exp.token, :condition
46
+ assert_equal exp.text, '(1)'
47
+ assert_equal exp.reference, 1
48
+ end
34
49
 
35
50
  def test_parse_conditional_nested_groups
36
51
  regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
@@ -91,7 +106,6 @@ class TestParserConditionals < Test::Unit::TestCase
91
106
  end
92
107
  end
93
108
 
94
-
95
109
  def test_parse_conditional_nested_alternation
96
110
  regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
97
111
 
@@ -114,7 +128,6 @@ class TestParserConditionals < Test::Unit::TestCase
114
128
  end
115
129
  end
116
130
 
117
-
118
131
  def test_parse_conditional_extra_separator
119
132
  regexp = /(?<A>a)(?(<A>)T|)/
120
133
 
@@ -137,6 +150,31 @@ class TestParserConditionals < Test::Unit::TestCase
137
150
  assert_equal '', seq_2.to_s
138
151
  end
139
152
 
153
+ def test_parse_conditional_quantified
154
+ regexp = /(foo)(?(1)\d|(\w)){42}/
155
+
156
+ root = RP.parse(regexp, 'ruby/2.0')
157
+ conditional = root[1]
158
+
159
+ assert conditional.quantified?
160
+ assert_equal '{42}', conditional.quantifier.text
161
+ assert_equal '(?(1)\d|(\w)){42}', conditional.to_s
162
+ refute conditional.branches.any?(&:quantified?)
163
+ end
164
+
165
+ def test_parse_conditional_branch_content_quantified
166
+ regexp = /(foo)(?(1)\d{23}|(\w){42})/
167
+
168
+ root = RP.parse(regexp, 'ruby/2.0')
169
+ conditional = root[1]
170
+
171
+ refute conditional.quantified?
172
+ refute conditional.branches.any?(&:quantified?)
173
+ assert conditional.branches[0][0].quantified?
174
+ assert_equal '{23}', conditional.branches[0][0].quantifier.text
175
+ assert conditional.branches[1][0].quantified?
176
+ assert_equal '{42}', conditional.branches[1][0].quantifier.text
177
+ end
140
178
 
141
179
  # For source (text) expressions only, ruby raises an error otherwise.
142
180
  def test_parse_conditional_excessive_branches
@@ -146,5 +184,4 @@ class TestParserConditionals < Test::Unit::TestCase
146
184
  RP.parse(regexp, 'ruby/2.0')
147
185
  }
148
186
  end
149
-
150
187
  end