regexp_parser 1.6.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,7 +21,7 @@
21
21
  set_close = ']';
22
22
  brackets = set_open | set_close;
23
23
 
24
- comment = ('#' . [^\n]* . '\n');
24
+ comment = ('#' . [^\n]* . '\n'?);
25
25
 
26
26
  class_name_posix = 'alnum' | 'alpha' | 'blank' |
27
27
  'cntrl' | 'digit' | 'graph' |
@@ -62,13 +62,17 @@
62
62
  quantifier_possessive = '?+' | '*+' | '++';
63
63
  quantifier_mode = '?' | '+';
64
64
 
65
- quantifier_interval = range_open . (digit+)? . ','? . (digit+)? .
66
- range_close . quantifier_mode?;
65
+ quantity_exact = (digit+);
66
+ quantity_minimum = (digit+) . ',';
67
+ quantity_maximum = ',' . (digit+);
68
+ quantity_range = (digit+) . ',' . (digit+);
69
+ quantifier_interval = range_open . ( quantity_exact | quantity_minimum |
70
+ quantity_maximum | quantity_range ) . range_close .
71
+ quantifier_mode?;
67
72
 
68
73
  quantifiers = quantifier_greedy | quantifier_reluctant |
69
74
  quantifier_possessive | quantifier_interval;
70
75
 
71
-
72
76
  conditional = '(?(';
73
77
 
74
78
  group_comment = '?#' . [^)]* . group_close;
@@ -114,7 +118,9 @@
114
118
  curlies | parantheses | brackets |
115
119
  line_anchor | quantifier_greedy;
116
120
 
117
- ascii_print = ((0x20..0x7e) - meta_char);
121
+ literal_delimiters = ']' | '}';
122
+
123
+ ascii_print = ((0x20..0x7e) - meta_char - '#');
118
124
  ascii_nonprint = (0x01..0x1f | 0x7f);
119
125
 
120
126
  utf8_2_byte = (0xc2..0xdf 0x80..0xbf);
@@ -122,7 +128,7 @@
122
128
  utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf);
123
129
 
124
130
  non_literal_escape = char_type_char | anchor_char | escaped_ascii |
125
- group_ref | keep_mark | [xucCM];
131
+ keep_mark | [xucCM];
126
132
 
127
133
  non_set_escape = (anchor_char - 'b') | group_ref | keep_mark |
128
134
  multi_codepoint_char_type | [0-9cCM];
@@ -417,6 +423,10 @@
417
423
  end
418
424
  };
419
425
 
426
+ literal_delimiters {
427
+ append_literal(data, ts, te)
428
+ };
429
+
420
430
  # Character sets
421
431
  # ------------------------------------------------------------------------
422
432
  set_open >set_opened {
@@ -620,10 +630,15 @@
620
630
  end
621
631
  };
622
632
 
623
- quantifier_interval @err(premature_end_error) {
633
+ quantifier_interval {
624
634
  emit(:quantifier, :interval, *text(data, ts, te))
625
635
  };
626
636
 
637
+ # Catch unmatched curly braces as literals
638
+ range_open {
639
+ append_literal(data, ts, te)
640
+ };
641
+
627
642
  # Escaped sequences
628
643
  # ------------------------------------------------------------------------
629
644
  backslash > (backslashed, 1) {
@@ -634,7 +649,9 @@
634
649
  if free_spacing
635
650
  emit(:free_space, :comment, *text(data, ts, te))
636
651
  else
637
- append_literal(data, ts, te)
652
+ # consume only the pound sign (#) and backtrack to do regular scanning
653
+ append_literal(data, ts, ts + 1)
654
+ fexec ts + 1;
638
655
  end
639
656
  };
640
657
 
@@ -722,21 +739,16 @@ class Regexp::Scanner
722
739
  #
723
740
  # This method may raise errors if a syntax error is encountered.
724
741
  # --------------------------------------------------------------------------
725
- def self.scan(input_object, &block)
726
- new.scan(input_object, &block)
742
+ def self.scan(input_object, options: nil, &block)
743
+ new.scan(input_object, options: options, &block)
727
744
  end
728
745
 
729
- def scan(input_object, &block)
746
+ def scan(input_object, options: nil, &block)
730
747
  self.literal = nil
731
748
  stack = []
732
749
 
733
- if input_object.is_a?(Regexp)
734
- input = input_object.source
735
- self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
736
- else
737
- input = input_object
738
- self.free_spacing = false
739
- end
750
+ input = input_object.is_a?(Regexp) ? input_object.source : input_object
751
+ self.free_spacing = free_spacing?(input_object, options)
740
752
  self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
741
753
 
742
754
  data = input.unpack("c*") if input.is_a?(String)
@@ -802,6 +814,18 @@ class Regexp::Scanner
802
814
  attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
803
815
  :group_depth, :set_depth, :conditional_stack
804
816
 
817
+ def free_spacing?(input_object, options)
818
+ if options && !input_object.is_a?(String)
819
+ raise ArgumentError, 'options cannot be supplied unless scanning a String'
820
+ end
821
+
822
+ options = input_object.options if input_object.is_a?(::Regexp)
823
+
824
+ return false unless options
825
+
826
+ options & Regexp::EXTENDED != 0
827
+ end
828
+
805
829
  def in_group?
806
830
  group_depth > 0
807
831
  end
@@ -74,9 +74,9 @@ module Regexp::Syntax
74
74
  end
75
75
 
76
76
  def warn_if_future_version(const_name)
77
- return if comparable_version(const_name) < comparable_version('3.0.0')
77
+ return if comparable_version(const_name) < comparable_version('4.0.0')
78
78
 
79
- warn('This library has only been tested up to Ruby 2.x, '\
79
+ warn('This library has only been tested up to Ruby 3.x, '\
80
80
  "but you are running with #{const_get(const_name).inspect}")
81
81
  end
82
82
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.6.0'
3
+ VERSION = '1.8.2'
4
4
  end
5
5
  end
@@ -32,5 +32,5 @@ Gem::Specification.new do |gem|
32
32
 
33
33
  gem.platform = Gem::Platform::RUBY
34
34
 
35
- gem.required_ruby_version = '>= 1.9.1'
35
+ gem.required_ruby_version = '>= 2.0.0'
36
36
  end
@@ -120,6 +120,13 @@ RSpec.describe(Regexp::MatchLength) do
120
120
  expect { result.next }.to raise_error(StopIteration)
121
121
  end
122
122
 
123
+ it 'is aware of limit option even if called without a block' do
124
+ result = ML.of(/a?/).each(limit: 1)
125
+ expect(result).to be_a(Enumerator)
126
+ expect(result.next).to eq 0
127
+ expect { result.next }.to raise_error(StopIteration)
128
+ end
129
+
123
130
  it 'is limited to 1000 iterations in case there are infinite match lengths' do
124
131
  expect(ML.of(/a*/).first(3000).size).to eq 1000
125
132
  end
@@ -39,6 +39,17 @@ RSpec.describe('Subexpression#traverse') do
39
39
  expect(visits).to eq 9
40
40
  end
41
41
 
42
+ specify('Subexpression#traverse without a block') do
43
+ root = RP.parse(/abc/)
44
+ enum = root.traverse
45
+
46
+ expect(enum).to be_a(Enumerator)
47
+ event, expr, idx = enum.next
48
+ expect(event).to eq(:visit)
49
+ expect(expr).to be_a(Regexp::Expression::Literal)
50
+ expect(idx).to eq(0)
51
+ end
52
+
42
53
  specify('Subexpression#walk alias') do
43
54
  root = RP.parse(/abc/)
44
55
 
@@ -81,6 +92,16 @@ RSpec.describe('Subexpression#traverse') do
81
92
  expect(indices).to eq [0, 0, 1, 0, 2]
82
93
  end
83
94
 
95
+ specify('Subexpression#each_expression without a block') do
96
+ root = RP.parse(/abc/)
97
+ enum = root.each_expression
98
+
99
+ expect(enum).to be_a(Enumerator)
100
+ expr, idx = enum.next
101
+ expect(expr).to be_a(Regexp::Expression::Literal)
102
+ expect(idx).to eq(0)
103
+ end
104
+
84
105
  specify('Subexpression#flat_map without block') do
85
106
  root = RP.parse(/a(b([c-e]+))?/)
86
107
 
@@ -85,44 +85,44 @@ RSpec.describe('Expression#options') do
85
85
  .and change { exp.unicode_classes? }.from(false).to(true)
86
86
  end
87
87
 
88
- RSpec.shared_examples '#options' do |regexp, klass, at: []|
88
+ RSpec.shared_examples '#options' do |regexp, path, klass|
89
89
  it "works for expression class #{klass}" do
90
- exp = RP.parse(/#{regexp.source}/i).dig(*at)
90
+ exp = RP.parse(/#{regexp.source}/i).dig(*path)
91
91
  expect(exp).to be_a(klass)
92
92
  expect(exp).to be_i
93
93
  expect(exp).not_to be_x
94
94
  end
95
95
  end
96
96
 
97
- include_examples '#options', //, Root
98
- include_examples '#options', /a/, Literal, at: [0]
99
- include_examples '#options', /\A/, Anchor::Base, at: [0]
100
- include_examples '#options', /\d/, CharacterType::Base, at: [0]
101
- include_examples '#options', /\n/, EscapeSequence::Base, at: [0]
102
- include_examples '#options', /\K/, Keep::Mark, at: [0]
103
- include_examples '#options', /./, CharacterType::Any, at: [0]
104
- include_examples '#options', /(a)/, Group::Base, at: [0]
105
- include_examples '#options', /(a)/, Literal, at: [0, 0]
106
- include_examples '#options', /(?=a)/, Assertion::Base, at: [0]
107
- include_examples '#options', /(?=a)/, Literal, at: [0, 0]
108
- include_examples '#options', /(a|b)/, Group::Base, at: [0]
109
- include_examples '#options', /(a|b)/, Alternation, at: [0, 0]
110
- include_examples '#options', /(a|b)/, Alternative, at: [0, 0, 0]
111
- include_examples '#options', /(a|b)/, Literal, at: [0, 0, 0, 0]
112
- include_examples '#options', /(a)\1/, Backreference::Base, at: [1]
113
- include_examples '#options', /(a)\k<1>/, Backreference::Number, at: [1]
114
- include_examples '#options', /(a)\g<1>/, Backreference::NumberCall, at: [1]
115
- include_examples '#options', /[a]/, CharacterSet, at: [0]
116
- include_examples '#options', /[a]/, Literal, at: [0, 0]
117
- include_examples '#options', /[a-z]/, CharacterSet::Range, at: [0, 0]
118
- include_examples '#options', /[a-z]/, Literal, at: [0, 0, 0]
119
- include_examples '#options', /[a&&z]/, CharacterSet::Intersection, at: [0, 0]
120
- include_examples '#options', /[a&&z]/, CharacterSet::IntersectedSequence, at: [0, 0, 0]
121
- include_examples '#options', /[a&&z]/, Literal, at: [0, 0, 0, 0]
122
- include_examples '#options', /[[:ascii:]]/, PosixClass, at: [0, 0]
123
- include_examples '#options', /\p{word}/, UnicodeProperty::Base, at: [0]
124
- include_examples '#options', /(a)(?(1)b|c)/, Conditional::Expression, at: [1]
125
- include_examples '#options', /(a)(?(1)b|c)/, Conditional::Condition, at: [1, 0]
126
- include_examples '#options', /(a)(?(1)b|c)/, Conditional::Branch, at: [1, 1]
127
- include_examples '#options', /(a)(?(1)b|c)/, Literal, at: [1, 1, 0]
97
+ include_examples '#options', //, [], Root
98
+ include_examples '#options', /a/, [0], Literal
99
+ include_examples '#options', /\A/, [0], Anchor::Base
100
+ include_examples '#options', /\d/, [0], CharacterType::Base
101
+ include_examples '#options', /\n/, [0], EscapeSequence::Base
102
+ include_examples '#options', /\K/, [0], Keep::Mark
103
+ include_examples '#options', /./, [0], CharacterType::Any
104
+ include_examples '#options', /(a)/, [0], Group::Base
105
+ include_examples '#options', /(a)/, [0, 0], Literal
106
+ include_examples '#options', /(?=a)/, [0], Assertion::Base
107
+ include_examples '#options', /(?=a)/, [0, 0], Literal
108
+ include_examples '#options', /(a|b)/, [0], Group::Base
109
+ include_examples '#options', /(a|b)/, [0, 0], Alternation
110
+ include_examples '#options', /(a|b)/, [0, 0, 0], Alternative
111
+ include_examples '#options', /(a|b)/, [0, 0, 0, 0], Literal
112
+ include_examples '#options', /(a)\1/, [1], Backreference::Base
113
+ include_examples '#options', /(a)\k<1>/, [1], Backreference::Number
114
+ include_examples '#options', /(a)\g<1>/, [1], Backreference::NumberCall
115
+ include_examples '#options', /[a]/, [0], CharacterSet
116
+ include_examples '#options', /[a]/, [0, 0], Literal
117
+ include_examples '#options', /[a-z]/, [0, 0], CharacterSet::Range
118
+ include_examples '#options', /[a-z]/, [0, 0, 0], Literal
119
+ include_examples '#options', /[a&&z]/, [0, 0], CharacterSet::Intersection
120
+ include_examples '#options', /[a&&z]/, [0, 0, 0], CharacterSet::IntersectedSequence
121
+ include_examples '#options', /[a&&z]/, [0, 0, 0, 0], Literal
122
+ include_examples '#options', /[[:ascii:]]/, [0, 0], PosixClass
123
+ include_examples '#options', /\p{word}/, [0], UnicodeProperty::Base
124
+ include_examples '#options', /(a)(?(1)b|c)/, [1], Conditional::Expression
125
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 0], Conditional::Condition
126
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 1], Conditional::Branch
127
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 1, 0], Literal
128
128
  end
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter lexing') do
4
+ include_examples 'lex', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
+
7
+ include_examples 'lex', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
+
10
+ include_examples 'lex', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
+
13
+ include_examples 'lex', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
+
16
+ include_examples 'lex', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
+
19
+ include_examples 'lex', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
+
22
+ include_examples 'lex', '}{+',
23
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
+ 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
+
27
+ include_examples 'lex', '{{var}}',
28
+ 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
+
30
+ include_examples 'lex', 'a{b}c',
31
+ 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a{1,2',
34
+ 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
+
36
+ include_examples 'lex', '({.+})',
37
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
+ 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
+ 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
+ 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
+ 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
+ 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
+
44
+ include_examples 'lex', ']',
45
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
+
47
+ include_examples 'lex', ']]',
48
+ 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
+
50
+ include_examples 'lex', ']\[',
51
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
+ 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
+
54
+ include_examples 'lex', '()',
55
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
+ 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
+
58
+ include_examples 'lex', '{abc:.+}}}[^}]]}',
59
+ 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
+ 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
+ 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
+ 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
+ 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
+ 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
+ 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
+ 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
+ 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
+ end
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to parse') do
4
+ it 'raises if if parsing from a Regexp and options are passed' do
5
+ expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
6
+ ArgumentError,
7
+ 'options cannot be supplied unless parsing a String'
8
+ )
9
+ end
10
+
11
+ it 'sets options if parsing from a String' do
12
+ root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13
+
14
+ expect(root.options).to eq(m: true, x: true)
15
+ end
16
+
17
+ it 'allows options to not be supplied when parsing from a Regexp' do
18
+ root = RP.parse(/a+/ix)
19
+
20
+ expect(root.options).to eq(i: true, x: true)
21
+ end
22
+
23
+ it 'has an empty option-hash when parsing from a String and passing no options' do
24
+ root = RP.parse('a+')
25
+
26
+ expect(root.options).to be_empty
27
+ end
28
+ end
@@ -35,6 +35,7 @@ RSpec.describe('Quantifier parsing') do
35
35
  include_examples 'quantifier', /a{4}b/, '{4}', :greedy, :interval, 4, 4
36
36
  include_examples 'quantifier', /a{4}?b/, '{4}?', :reluctant, :interval, 4, 4
37
37
  include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
38
+ include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
38
39
 
39
40
  specify('mode-checking methods') do
40
41
  exp = RP.parse(/a??/).first
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter scanning') do
4
+ include_examples 'scan', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1]
6
+
7
+ include_examples 'scan', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2]
9
+
10
+ include_examples 'scan', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1]
12
+
13
+ include_examples 'scan', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2]
15
+
16
+ include_examples 'scan', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2]
18
+
19
+ include_examples 'scan', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2]
21
+
22
+ include_examples 'scan', '}{+',
23
+ 0 => [:literal, :literal, '}{', 0, 2]
24
+
25
+ include_examples 'scan', '{{var}}',
26
+ 0 => [:literal, :literal, '{{var}}', 0, 7]
27
+
28
+ include_examples 'scan', 'a{1,2',
29
+ 0 => [:literal, :literal, 'a{1,2', 0, 5]
30
+
31
+ include_examples 'scan', '({.+})',
32
+ 0 => [:group, :capture, '(', 0, 1],
33
+ 1 => [:literal, :literal, '{', 1, 2],
34
+ 2 => [:meta, :dot, '.', 2, 3],
35
+ 3 => [:quantifier, :one_or_more, '+', 3, 4],
36
+ 4 => [:literal, :literal, '}', 4, 5],
37
+ 5 => [:group, :close, ')', 5, 6]
38
+
39
+ include_examples 'scan', ']',
40
+ 0 => [:literal, :literal, ']', 0, 1]
41
+
42
+ include_examples 'scan', ']]',
43
+ 0 => [:literal, :literal, ']]', 0, 2]
44
+
45
+ include_examples 'scan', ']\[',
46
+ 0 => [:literal, :literal, ']', 0, 1],
47
+ 1 => [:escape, :set_open, '\[', 1, 3]
48
+
49
+ include_examples 'scan', '()',
50
+ 0 => [:group, :capture, '(', 0, 1],
51
+ 1 => [:group, :close, ')', 1, 2]
52
+ end