regexp_parser 1.6.0 → 1.8.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,7 +21,7 @@
21
21
  set_close = ']';
22
22
  brackets = set_open | set_close;
23
23
 
24
- comment = ('#' . [^\n]* . '\n');
24
+ comment = ('#' . [^\n]* . '\n'?);
25
25
 
26
26
  class_name_posix = 'alnum' | 'alpha' | 'blank' |
27
27
  'cntrl' | 'digit' | 'graph' |
@@ -62,13 +62,17 @@
62
62
  quantifier_possessive = '?+' | '*+' | '++';
63
63
  quantifier_mode = '?' | '+';
64
64
 
65
- quantifier_interval = range_open . (digit+)? . ','? . (digit+)? .
66
- range_close . quantifier_mode?;
65
+ quantity_exact = (digit+);
66
+ quantity_minimum = (digit+) . ',';
67
+ quantity_maximum = ',' . (digit+);
68
+ quantity_range = (digit+) . ',' . (digit+);
69
+ quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
70
+ quantity_maximum | quantity_range ) . range_close .
71
+ quantifier_mode?;
67
72
 
68
73
  quantifiers = quantifier_greedy | quantifier_reluctant |
69
74
  quantifier_possessive | quantifier_interval;
70
75
 
71
-
72
76
  conditional = '(?(';
73
77
 
74
78
  group_comment = '?#' . [^)]* . group_close;
@@ -114,7 +118,9 @@
114
118
  curlies | parantheses | brackets |
115
119
  line_anchor | quantifier_greedy;
116
120
 
117
- ascii_print = ((0x20..0x7e) - meta_char);
121
+ literal_delimiters = ']' | '}';
122
+
123
+ ascii_print = ((0x20..0x7e) - meta_char - '#');
118
124
  ascii_nonprint = (0x01..0x1f | 0x7f);
119
125
 
120
126
  utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
@@ -122,7 +128,7 @@
122
128
  utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
123
129
 
124
130
  non_literal_escape = char_type_char | anchor_char | escaped_ascii |
125
- group_ref | keep_mark | [xucCM];
131
+ keep_mark | [xucCM];
126
132
 
127
133
  non_set_escape = (anchor_char - 'b') | group_ref | keep_mark |
128
134
  multi_codepoint_char_type | [0-9cCM];
@@ -417,6 +423,10 @@
417
423
  end
418
424
  };
419
425
 
426
+ literal_delimiters {
427
+ append_literal(data, ts, te)
428
+ };
429
+
420
430
  # Character sets
421
431
  # ------------------------------------------------------------------------
422
432
  set_open >set_opened {
@@ -620,10 +630,15 @@
620
630
  end
621
631
  };
622
632
 
623
- quantifier_interval @err(premature_end_error) {
633
+ quantifier_interval {
624
634
  emit(:quantifier, :interval, *text(data, ts, te))
625
635
  };
626
636
 
637
+ # Catch unmatched curly braces as literals
638
+ range_open {
639
+ append_literal(data, ts, te)
640
+ };
641
+
627
642
  # Escaped sequences
628
643
  # ------------------------------------------------------------------------
629
644
  backslash > (backslashed, 1) {
@@ -634,7 +649,9 @@
634
649
  if free_spacing
635
650
  emit(:free_space, :comment, *text(data, ts, te))
636
651
  else
637
- append_literal(data, ts, te)
652
+ # consume only the pound sign (#) and backtrack to do regular scanning
653
+ append_literal(data, ts, ts + 1)
654
+ fexec ts + 1;
638
655
  end
639
656
  };
640
657
 
@@ -722,21 +739,16 @@ class Regexp::Scanner
722
739
  #
723
740
  # This method may raise errors if a syntax error is encountered.
724
741
  # --------------------------------------------------------------------------
725
- def self.scan(input_object, &block)
726
- new.scan(input_object, &block)
742
+ def self.scan(input_object, options: nil, &block)
743
+ new.scan(input_object, options: options, &block)
727
744
  end
728
745
 
729
- def scan(input_object, &block)
746
+ def scan(input_object, options: nil, &block)
730
747
  self.literal = nil
731
748
  stack = []
732
749
 
733
- if input_object.is_a?(Regexp)
734
- input = input_object.source
735
- self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
736
- else
737
- input = input_object
738
- self.free_spacing = false
739
- end
750
+ input = input_object.is_a?(Regexp) ? input_object.source : input_object
751
+ self.free_spacing = free_spacing?(input_object, options)
740
752
  self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
741
753
 
742
754
  data = input.unpack("c*") if input.is_a?(String)
@@ -802,6 +814,18 @@ class Regexp::Scanner
802
814
  attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
803
815
  :group_depth, :set_depth, :conditional_stack
804
816
 
817
+ def free_spacing?(input_object, options)
818
+ if options && !input_object.is_a?(String)
819
+ raise ArgumentError, 'options cannot be supplied unless scanning a String'
820
+ end
821
+
822
+ options = input_object.options if input_object.is_a?(::Regexp)
823
+
824
+ return false unless options
825
+
826
+ options & Regexp::EXTENDED != 0
827
+ end
828
+
805
829
  def in_group?
806
830
  group_depth > 0
807
831
  end
@@ -74,9 +74,9 @@ module Regexp::Syntax
74
74
  end
75
75
 
76
76
  def warn_if_future_version(const_name)
77
- return if comparable_version(const_name) < comparable_version('3.0.0')
77
+ return if comparable_version(const_name) < comparable_version('4.0.0')
78
78
 
79
- warn('This library has only been tested up to Ruby 2.x, '\
79
+ warn('This library has only been tested up to Ruby 3.x, '\
80
80
  "but you are running with #{const_get(const_name).inspect}")
81
81
  end
82
82
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.6.0'
3
+ VERSION = '1.8.2'
4
4
  end
5
5
  end
@@ -32,5 +32,5 @@ Gem::Specification.new do |gem|
32
32
 
33
33
  gem.platform = Gem::Platform::RUBY
34
34
 
35
- gem.required_ruby_version = '>= 1.9.1'
35
+ gem.required_ruby_version = '>= 2.0.0'
36
36
  end
@@ -120,6 +120,13 @@ RSpec.describe(Regexp::MatchLength) do
120
120
  expect { result.next }.to raise_error(StopIteration)
121
121
  end
122
122
 
123
+ it 'is aware of limit option even if called without a block' do
124
+ result = ML.of(/a?/).each(limit: 1)
125
+ expect(result).to be_a(Enumerator)
126
+ expect(result.next).to eq 0
127
+ expect { result.next }.to raise_error(StopIteration)
128
+ end
129
+
123
130
  it 'is limited to 1000 iterations in case there are infinite match lengths' do
124
131
  expect(ML.of(/a*/).first(3000).size).to eq 1000
125
132
  end
@@ -39,6 +39,17 @@ RSpec.describe('Subexpression#traverse') do
39
39
  expect(visits).to eq 9
40
40
  end
41
41
 
42
+ specify('Subexpression#traverse without a block') do
43
+ root = RP.parse(/abc/)
44
+ enum = root.traverse
45
+
46
+ expect(enum).to be_a(Enumerator)
47
+ event, expr, idx = enum.next
48
+ expect(event).to eq(:visit)
49
+ expect(expr).to be_a(Regexp::Expression::Literal)
50
+ expect(idx).to eq(0)
51
+ end
52
+
42
53
  specify('Subexpression#walk alias') do
43
54
  root = RP.parse(/abc/)
44
55
 
@@ -81,6 +92,16 @@ RSpec.describe('Subexpression#traverse') do
81
92
  expect(indices).to eq [0, 0, 1, 0, 2]
82
93
  end
83
94
 
95
+ specify('Subexpression#each_expression without a block') do
96
+ root = RP.parse(/abc/)
97
+ enum = root.each_expression
98
+
99
+ expect(enum).to be_a(Enumerator)
100
+ expr, idx = enum.next
101
+ expect(expr).to be_a(Regexp::Expression::Literal)
102
+ expect(idx).to eq(0)
103
+ end
104
+
84
105
  specify('Subexpression#flat_map without block') do
85
106
  root = RP.parse(/a(b([c-e]+))?/)
86
107
 
@@ -85,44 +85,44 @@ RSpec.describe('Expression#options') do
85
85
  .and change { exp.unicode_classes? }.from(false).to(true)
86
86
  end
87
87
 
88
- RSpec.shared_examples '#options' do |regexp, klass, at: []|
88
+ RSpec.shared_examples '#options' do |regexp, path, klass|
89
89
  it "works for expression class #{klass}" do
90
- exp = RP.parse(/#{regexp.source}/i).dig(*at)
90
+ exp = RP.parse(/#{regexp.source}/i).dig(*path)
91
91
  expect(exp).to be_a(klass)
92
92
  expect(exp).to be_i
93
93
  expect(exp).not_to be_x
94
94
  end
95
95
  end
96
96
 
97
- include_examples '#options', //, Root
98
- include_examples '#options', /a/, Literal, at: [0]
99
- include_examples '#options', /\A/, Anchor::Base, at: [0]
100
- include_examples '#options', /\d/, CharacterType::Base, at: [0]
101
- include_examples '#options', /\n/, EscapeSequence::Base, at: [0]
102
- include_examples '#options', /\K/, Keep::Mark, at: [0]
103
- include_examples '#options', /./, CharacterType::Any, at: [0]
104
- include_examples '#options', /(a)/, Group::Base, at: [0]
105
- include_examples '#options', /(a)/, Literal, at: [0, 0]
106
- include_examples '#options', /(?=a)/, Assertion::Base, at: [0]
107
- include_examples '#options', /(?=a)/, Literal, at: [0, 0]
108
- include_examples '#options', /(a|b)/, Group::Base, at: [0]
109
- include_examples '#options', /(a|b)/, Alternation, at: [0, 0]
110
- include_examples '#options', /(a|b)/, Alternative, at: [0, 0, 0]
111
- include_examples '#options', /(a|b)/, Literal, at: [0, 0, 0, 0]
112
- include_examples '#options', /(a)\1/, Backreference::Base, at: [1]
113
- include_examples '#options', /(a)\k<1>/, Backreference::Number, at: [1]
114
- include_examples '#options', /(a)\g<1>/, Backreference::NumberCall, at: [1]
115
- include_examples '#options', /[a]/, CharacterSet, at: [0]
116
- include_examples '#options', /[a]/, Literal, at: [0, 0]
117
- include_examples '#options', /[a-z]/, CharacterSet::Range, at: [0, 0]
118
- include_examples '#options', /[a-z]/, Literal, at: [0, 0, 0]
119
- include_examples '#options', /[a&&z]/, CharacterSet::Intersection, at: [0, 0]
120
- include_examples '#options', /[a&&z]/, CharacterSet::IntersectedSequence, at: [0, 0, 0]
121
- include_examples '#options', /[a&&z]/, Literal, at: [0, 0, 0, 0]
122
- include_examples '#options', /[[:ascii:]]/, PosixClass, at: [0, 0]
123
- include_examples '#options', /\p{word}/, UnicodeProperty::Base, at: [0]
124
- include_examples '#options', /(a)(?(1)b|c)/, Conditional::Expression, at: [1]
125
- include_examples '#options', /(a)(?(1)b|c)/, Conditional::Condition, at: [1, 0]
126
- include_examples '#options', /(a)(?(1)b|c)/, Conditional::Branch, at: [1, 1]
127
- include_examples '#options', /(a)(?(1)b|c)/, Literal, at: [1, 1, 0]
97
+ include_examples '#options', //, [], Root
98
+ include_examples '#options', /a/, [0], Literal
99
+ include_examples '#options', /\A/, [0], Anchor::Base
100
+ include_examples '#options', /\d/, [0], CharacterType::Base
101
+ include_examples '#options', /\n/, [0], EscapeSequence::Base
102
+ include_examples '#options', /\K/, [0], Keep::Mark
103
+ include_examples '#options', /./, [0], CharacterType::Any
104
+ include_examples '#options', /(a)/, [0], Group::Base
105
+ include_examples '#options', /(a)/, [0, 0], Literal
106
+ include_examples '#options', /(?=a)/, [0], Assertion::Base
107
+ include_examples '#options', /(?=a)/, [0, 0], Literal
108
+ include_examples '#options', /(a|b)/, [0], Group::Base
109
+ include_examples '#options', /(a|b)/, [0, 0], Alternation
110
+ include_examples '#options', /(a|b)/, [0, 0, 0], Alternative
111
+ include_examples '#options', /(a|b)/, [0, 0, 0, 0], Literal
112
+ include_examples '#options', /(a)\1/, [1], Backreference::Base
113
+ include_examples '#options', /(a)\k<1>/, [1], Backreference::Number
114
+ include_examples '#options', /(a)\g<1>/, [1], Backreference::NumberCall
115
+ include_examples '#options', /[a]/, [0], CharacterSet
116
+ include_examples '#options', /[a]/, [0, 0], Literal
117
+ include_examples '#options', /[a-z]/, [0, 0], CharacterSet::Range
118
+ include_examples '#options', /[a-z]/, [0, 0, 0], Literal
119
+ include_examples '#options', /[a&&z]/, [0, 0], CharacterSet::Intersection
120
+ include_examples '#options', /[a&&z]/, [0, 0, 0], CharacterSet::IntersectedSequence
121
+ include_examples '#options', /[a&&z]/, [0, 0, 0, 0], Literal
122
+ include_examples '#options', /[[:ascii:]]/, [0, 0], PosixClass
123
+ include_examples '#options', /\p{word}/, [0], UnicodeProperty::Base
124
+ include_examples '#options', /(a)(?(1)b|c)/, [1], Conditional::Expression
125
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 0], Conditional::Condition
126
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 1], Conditional::Branch
127
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 1, 0], Literal
128
128
  end
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter lexing') do
4
+ include_examples 'lex', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
+
7
+ include_examples 'lex', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
+
10
+ include_examples 'lex', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
+
13
+ include_examples 'lex', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
+
16
+ include_examples 'lex', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
+
19
+ include_examples 'lex', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
+
22
+ include_examples 'lex', '}{+',
23
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
+ 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
+
27
+ include_examples 'lex', '{{var}}',
28
+ 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
+
30
+ include_examples 'lex', 'a{b}c',
31
+ 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a{1,2',
34
+ 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
+
36
+ include_examples 'lex', '({.+})',
37
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
+ 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
+ 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
+ 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
+ 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
+ 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
+
44
+ include_examples 'lex', ']',
45
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
+
47
+ include_examples 'lex', ']]',
48
+ 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
+
50
+ include_examples 'lex', ']\[',
51
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
+ 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
+
54
+ include_examples 'lex', '()',
55
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
+ 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
+
58
+ include_examples 'lex', '{abc:.+}}}[^}]]}',
59
+ 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
+ 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
+ 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
+ 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
+ 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
+ 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
+ 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
+ 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
+ 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to parse') do
4
+ it 'raises if if parsing from a Regexp and options are passed' do
5
+ expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
6
+ ArgumentError,
7
+ 'options cannot be supplied unless parsing a String'
8
+ )
9
+ end
10
+
11
+ it 'sets options if parsing from a String' do
12
+ root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13
+
14
+ expect(root.options).to eq(m: true, x: true)
15
+ end
16
+
17
+ it 'allows options to not be supplied when parsing from a Regexp' do
18
+ root = RP.parse(/a+/ix)
19
+
20
+ expect(root.options).to eq(i: true, x: true)
21
+ end
22
+
23
+ it 'has an empty option-hash when parsing from a String and passing no options' do
24
+ root = RP.parse('a+')
25
+
26
+ expect(root.options).to be_empty
27
+ end
28
+ end
@@ -35,6 +35,7 @@ RSpec.describe('Quantifier parsing') do
35
35
  include_examples 'quantifier', /a{4}b/, '{4}', :greedy, :interval, 4, 4
36
36
  include_examples 'quantifier', /a{4}?b/, '{4}?', :reluctant, :interval, 4, 4
37
37
  include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
38
+ include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
38
39
 
39
40
  specify('mode-checking methods') do
40
41
  exp = RP.parse(/a??/).first
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter scanning') do
4
+ include_examples 'scan', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1]
6
+
7
+ include_examples 'scan', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2]
9
+
10
+ include_examples 'scan', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1]
12
+
13
+ include_examples 'scan', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2]
15
+
16
+ include_examples 'scan', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2]
18
+
19
+ include_examples 'scan', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2]
21
+
22
+ include_examples 'scan', '}{+',
23
+ 0 => [:literal, :literal, '}{', 0, 2]
24
+
25
+ include_examples 'scan', '{{var}}',
26
+ 0 => [:literal, :literal, '{{var}}', 0, 7]
27
+
28
+ include_examples 'scan', 'a{1,2',
29
+ 0 => [:literal, :literal, 'a{1,2', 0, 5]
30
+
31
+ include_examples 'scan', '({.+})',
32
+ 0 => [:group, :capture, '(', 0, 1],
33
+ 1 => [:literal, :literal, '{', 1, 2],
34
+ 2 => [:meta, :dot, '.', 2, 3],
35
+ 3 => [:quantifier, :one_or_more, '+', 3, 4],
36
+ 4 => [:literal, :literal, '}', 4, 5],
37
+ 5 => [:group, :close, ')', 5, 6]
38
+
39
+ include_examples 'scan', ']',
40
+ 0 => [:literal, :literal, ']', 0, 1]
41
+
42
+ include_examples 'scan', ']]',
43
+ 0 => [:literal, :literal, ']]', 0, 2]
44
+
45
+ include_examples 'scan', ']\[',
46
+ 0 => [:literal, :literal, ']', 0, 1],
47
+ 1 => [:escape, :set_open, '\[', 1, 3]
48
+
49
+ include_examples 'scan', '()',
50
+ 0 => [:group, :capture, '(', 0, 1],
51
+ 1 => [:group, :close, ')', 1, 2]
52
+ end