regexp_parser 1.7.1 โ†’ 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -74,9 +74,9 @@ module Regexp::Syntax
74
74
  end
75
75
 
76
76
  def warn_if_future_version(const_name)
77
- return if comparable_version(const_name) < comparable_version('3.0.0')
77
+ return if comparable_version(const_name) < comparable_version('4.0.0')
78
78
 
79
- warn('This library has only been tested up to Ruby 2.x, '\
79
+ warn('This library has only been tested up to Ruby 3.x, '\
80
80
  "but you are running with #{const_get(const_name).inspect}")
81
81
  end
82
82
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.7.1'
3
+ VERSION = '2.0.1'
4
4
  end
5
5
  end
@@ -32,5 +32,5 @@ Gem::Specification.new do |gem|
32
32
 
33
33
  gem.platform = Gem::Platform::RUBY
34
34
 
35
- gem.required_ruby_version = '>= 1.9.1'
35
+ gem.required_ruby_version = '>= 2.0.0'
36
36
  end
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
91
91
  expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
92
92
  expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
93
93
  end
94
+
95
+ specify('#base_length') do
96
+ expect(RP.parse(/(aa)/)[0].base_length).to eq 4
97
+ expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
98
+ end
99
+
100
+ specify('#full_length') do
101
+ expect(RP.parse(/(aa)/)[0].full_length).to eq 4
102
+ expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
103
+ end
94
104
  end
@@ -97,4 +97,20 @@ RSpec.describe('Expression#to_s') do
97
97
 
98
98
  expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
99
99
  end
100
+
101
+ # special case: implicit groups used for chained quantifiers produce no parens
102
+ specify 'chained quantifiers #to_s' do
103
+ pattern = /a+{1}{2}/
104
+ root = RP.parse(pattern)
105
+ expect(root.to_s).to eq 'a+{1}{2}'
106
+ end
107
+
108
+ # regression test for https://github.com/ammar/regexp_parser/issues/74
109
+ specify('non-ascii comment') do
110
+ pattern = '(?x) ๐Ÿ˜‹ # ๐Ÿ˜‹'
111
+ root = RP.parse(pattern)
112
+ expect(root.last).to be_a Regexp::Expression::Comment
113
+ expect(root.last.to_s).to eq '# ๐Ÿ˜‹'
114
+ expect(root.to_s).to eq pattern
115
+ end
100
116
  end
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
10
10
  1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
11
  2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
12
 
13
- # 2 byte wide characters, Arabic
14
- include_examples 'lex', 'ุง',
15
- 0 => [:literal, :literal, 'ุง', 0, 2, 0, 0, 0]
16
-
17
- include_examples 'lex', 'aุงbุจcุช',
18
- 0 => [:literal, :literal, 'aุงbุจcุช', 0, 9, 0, 0, 0]
19
-
20
- include_examples 'lex', 'aุงbุจุช?',
21
- 0 => [:literal, :literal, 'aุงbุจ', 0, 6, 0, 0, 0],
22
- 1 => [:literal, :literal, 'ุช', 6, 8, 0, 0, 0],
23
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
24
-
25
- include_examples 'lex', 'aุง?bุจcุช+',
26
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
27
- 1 => [:literal, :literal, 'ุง', 1, 3, 0, 0, 0],
28
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
29
- 3 => [:literal, :literal, 'bุจc', 4, 8, 0, 0, 0],
30
- 4 => [:literal, :literal, 'ุช', 8, 10, 0, 0, 0],
31
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
32
-
33
- include_examples 'lex', 'a(ุงbุจ+)cุช?',
34
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
35
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
36
- 2 => [:literal, :literal, 'ุงb', 2, 5, 1, 0, 0],
37
- 3 => [:literal, :literal, 'ุจ', 5, 7, 1, 0, 0],
38
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
39
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
40
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
41
- 7 => [:literal, :literal, 'ุช', 10, 12, 0, 0, 0],
42
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
13
+ # 2 byte wide characters
14
+ include_examples 'lex', 'รครถรผ+',
15
+ 0 => [:literal, :literal, 'รครถ', 0, 2, 0, 0, 0],
16
+ 1 => [:literal, :literal, 'รผ', 2, 3, 0, 0, 0],
17
+ 2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
43
18
 
44
19
  # 3 byte wide characters, Japanese
45
20
  include_examples 'lex', 'ab?ใ‚Œใพใ™+cd',
46
21
  0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
47
22
  1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
48
23
  2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
49
- 3 => [:literal, :literal, 'ใ‚Œใพ', 3, 9, 0, 0, 0],
50
- 4 => [:literal, :literal, 'ใ™', 9, 12, 0, 0, 0],
51
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
52
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0]
24
+ 3 => [:literal, :literal, 'ใ‚Œใพ', 3, 5, 0, 0, 0],
25
+ 4 => [:literal, :literal, 'ใ™', 5, 6, 0, 0, 0],
26
+ 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
27
+ 6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
53
28
 
54
29
  # 4 byte wide characters, Osmanya
55
30
  include_examples 'lex', '๐’€๐’?๐’‚ab+๐’ƒ',
56
- 0 => [:literal, :literal, '๐’€', 0, 4, 0, 0, 0],
57
- 1 => [:literal, :literal, '๐’', 4, 8, 0, 0, 0],
58
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
59
- 3 => [:literal, :literal, '๐’‚a', 9, 14, 0, 0, 0],
60
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
61
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
62
- 6 => [:literal, :literal, '๐’ƒ', 16, 20, 0, 0, 0]
31
+ 0 => [:literal, :literal, '๐’€', 0, 1, 0, 0, 0],
32
+ 1 => [:literal, :literal, '๐’', 1, 2, 0, 0, 0],
33
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
34
+ 3 => [:literal, :literal, '๐’‚a', 3, 5, 0, 0, 0],
35
+ 4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
36
+ 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
37
+ 6 => [:literal, :literal, '๐’ƒ', 7, 8, 0, 0, 0]
63
38
 
64
39
  include_examples 'lex', 'mu๐„ž?si*๐„ซc+',
65
40
  0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
66
- 1 => [:literal, :literal, '๐„ž', 2, 6, 0, 0, 0],
67
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
68
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
69
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
70
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
71
- 6 => [:literal, :literal, '๐„ซ', 10, 14, 0, 0, 0],
72
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
73
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
41
+ 1 => [:literal, :literal, '๐„ž', 2, 3, 0, 0, 0],
42
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
43
+ 3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
44
+ 4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
45
+ 5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
46
+ 6 => [:literal, :literal, '๐„ซ', 7, 8, 0, 0, 0],
47
+ 7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
48
+ 8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
74
49
 
75
50
  specify('lex single 2 byte char') do
76
51
  tokens = RL.lex("\u0627+")
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
25
25
  include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
26
  include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
27
 
28
- # hex escapes
28
+ # hex escapes
29
29
  include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
30
 
31
31
  # octal escapes
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to parse') do
4
+ it 'raises if if parsing from a Regexp and options are passed' do
5
+ expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
6
+ ArgumentError,
7
+ 'options cannot be supplied unless parsing a String'
8
+ )
9
+ end
10
+
11
+ it 'sets options if parsing from a String' do
12
+ root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13
+
14
+ expect(root.options).to eq(m: true, x: true)
15
+ end
16
+
17
+ it 'allows options to not be supplied when parsing from a Regexp' do
18
+ root = RP.parse(/a+/ix)
19
+
20
+ expect(root.options).to eq(i: true, x: true)
21
+ end
22
+
23
+ it 'has an empty option-hash when parsing from a String and passing no options' do
24
+ root = RP.parse('a+')
25
+
26
+ expect(root.options).to be_empty
27
+ end
28
+ end
@@ -37,6 +37,21 @@ RSpec.describe('Quantifier parsing') do
37
37
  include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
38
38
  include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
39
39
 
40
+ # special case: exps with chained quantifiers are wrapped in implicit passive groups
41
+ include_examples 'parse', /a+{2}{3}/,
42
+ 0 => [
43
+ :group, :passive, Group::Passive, implicit?: true, level: 0,
44
+ quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
45
+ ],
46
+ [0, 0] => [
47
+ :group, :passive, Group::Passive, implicit?: true, level: 1,
48
+ quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
49
+ ],
50
+ [0, 0, 0] => [
51
+ :literal, :literal, Literal, text: 'a', level: 2,
52
+ quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
53
+ ]
54
+
40
55
  specify('mode-checking methods') do
41
56
  exp = RP.parse(/a??/).first
42
57
 
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
17
17
  end
18
18
 
19
19
  specify('parse set range hex') do
20
- root = RP.parse('[\\x00-\\x99]')
20
+ root = RP.parse('[\\x00-\\x22]')
21
21
  set = root[0]
22
22
  range = set[0]
23
23
 
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
26
26
  expect(range.count).to eq 2
27
27
  expect(range.first.to_s).to eq '\\x00'
28
28
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
- expect(range.last.to_s).to eq '\\x99'
29
+ expect(range.last.to_s).to eq '\\x22'
30
30
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match '\\x50'
31
+ expect(set).to match "\x11"
32
32
  end
33
33
 
34
34
  specify('parse set range unicode') do
@@ -11,12 +11,23 @@ RSpec.describe('Escape scanning') do
11
11
  include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
12
  include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
13
 
14
+ # ineffectual literal escapes
15
+ # these cause "Unknown escape" warnings in Ruby for ascii chars,
16
+ # and simply drop the backslash for non-ascii chars (/\รผ/.inspect == '/รผ/').
17
+ # In terms of matching, Ruby treats them both like non-escaped literals.
14
18
  include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
19
+ include_examples 'scan', 'a\รผc', 1 => [:escape, :literal, '\รผ', 1, 3]
20
+ include_examples 'scan', 'a\๐Ÿ˜‹c', 1 => [:escape, :literal, '\๐Ÿ˜‹', 1, 3]
21
+
22
+ # these incomplete ref/call sequences are treated as literal escapes by Ruby
23
+ include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
24
+ include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
15
25
 
16
26
  include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
27
  include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
18
28
  include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
19
29
 
30
+ include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
20
31
  include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
21
32
  include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
22
33
 
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
39
39
  11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
40
  17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
41
  29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+
43
+ # single line / no trailing newline (c.f. issue #66)
44
+ include_examples 'scan', /a # b/x,
45
+ 0 => [:literal, :literal, 'a', 0, 1],
46
+ 1 => [:free_space, :whitespace, ' ', 1, 2],
47
+ 2 => [:free_space, :comment, "# b", 2, 5]
48
+
49
+ # without spaces (c.f. issue #66)
50
+ include_examples 'scan', /a#b/x,
51
+ 0 => [:literal, :literal, 'a', 0, 1],
52
+ 1 => [:free_space, :comment, "#b", 1, 3]
42
53
  end
43
54
 
44
55
  describe('scan free space inlined') do
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
130
141
  26 => [:literal, :literal, 'i j', 35, 38],
131
142
  27 => [:group, :close, ')', 38, 39]
132
143
  end
144
+
145
+ describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
146
+ include_examples 'scan', /a#bcd/,
147
+ 0 => [:literal, :literal, 'a#bcd', 0, 5]
148
+ include_examples 'scan', /a # bcd/,
149
+ 0 => [:literal, :literal, 'a # bcd', 0, 7]
150
+
151
+ include_examples 'scan', /a#\d/,
152
+ 0 => [:literal, :literal, 'a#', 0, 2],
153
+ 1 => [:type, :digit, '\d', 2, 4]
154
+ include_examples 'scan', /a # \d/,
155
+ 0 => [:literal, :literal, 'a # ', 0, 4],
156
+ 1 => [:type, :digit, '\d', 4, 6]
157
+
158
+ include_examples 'scan', /a#()/,
159
+ 0 => [:literal, :literal, 'a#', 0, 2],
160
+ 1 => [:group, :capture, '(', 2, 3]
161
+ include_examples 'scan', /a # ()/,
162
+ 0 => [:literal, :literal, 'a # ', 0, 4],
163
+ 1 => [:group, :capture, '(', 4, 5]
164
+ end
133
165
  end
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
5
5
  include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
6
  include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
7
 
8
+ # Named groups
9
+ # only names that start with a hyphen or digit (ascii or other) are invalid
8
10
  include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
11
  include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
-
11
12
  include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
13
  include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
14
+ include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
15
+ include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
16
+ include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
17
+ include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
18
+ include_examples 'scan', '(?<รผรผuuรผรผ>abc)', 0 => [:group, :named_ab, '(?<รผรผuuรผรผ>', 0,10]
19
+ include_examples 'scan', "(?'รผรผuuรผรผ'abc)", 0 => [:group, :named_sq, "(?'รผรผuuรผรผ'", 0,10]
20
+ include_examples 'scan', "(?<๐Ÿ˜‹1234๐Ÿ˜‹>abc)", 0 => [:group, :named_ab, "(?<๐Ÿ˜‹1234๐Ÿ˜‹>", 0,10]
21
+ include_examples 'scan', "(?'๐Ÿ˜‹1234๐Ÿ˜‹'abc)", 0 => [:group, :named_sq, "(?'๐Ÿ˜‹1234๐Ÿ˜‹'", 0,10]
13
22
 
14
23
  include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
24
  include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
@@ -2,48 +2,38 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe('UTF8 scanning') do
4
4
  # ascii, single byte characters
5
- include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
5
+ include_examples 'scan', 'a',
6
+ 0 => [:literal, :literal, 'a', 0, 1]
6
7
 
7
- include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
- include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
8
+ include_examples 'scan', 'ab+',
9
+ 0 => [:literal, :literal, 'ab', 0, 2],
10
+ 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
11
 
10
- # 2 byte wide characters, Arabic
11
- include_examples 'scan', 'aุงbุจcุช', 0 => [:literal, :literal, 'aุงbุจcุช', 0, 9]
12
-
13
- include_examples 'scan', 'aุงbุจุช?', 0 => [:literal, :literal, 'aุงbุจุช', 0, 8]
14
- include_examples 'scan', 'aุงbุจุช?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
-
16
- include_examples 'scan', 'aุง?bุจcุช+', 0 => [:literal, :literal, 'aุง', 0, 3]
17
- include_examples 'scan', 'aุง?bุจcุช+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
- include_examples 'scan', 'aุง?bุจcุช+', 2 => [:literal, :literal, 'bุจcุช', 4, 10]
19
- include_examples 'scan', 'aุง?bุจcุช+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
-
21
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 0 => [:literal, :literal, 'a', 0, 1]
22
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 1 => [:group, :capture, '(', 1, 2]
23
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 2 => [:literal, :literal, 'ุงbุจ', 2, 7]
24
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 4 => [:group, :close, ')', 8, 9]
26
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 5 => [:literal, :literal, 'cุช', 9, 12]
27
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
12
+ # 2 byte wide characters
13
+ include_examples 'scan', 'รครถรผ',
14
+ 0 => [:literal, :literal, 'รครถรผ', 0, 3]
28
15
 
29
16
  # 3 byte wide characters, Japanese
30
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 2 => [:literal, :literal, 'ใ‚Œใพใ™', 3, 12]
33
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 4 => [:literal, :literal, 'cd', 13, 15]
17
+ include_examples 'scan', 'ab?ใ‚Œใพใ™+cd',
18
+ 0 => [:literal, :literal, 'ab', 0, 2],
19
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
20
+ 2 => [:literal, :literal, 'ใ‚Œใพใ™', 3, 6],
21
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
22
+ 4 => [:literal, :literal, 'cd', 7, 9]
35
23
 
36
24
  # 4 byte wide characters, Osmanya
37
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 0 => [:literal, :literal, '๐’€๐’', 0, 8]
38
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 2 => [:literal, :literal, '๐’‚ab', 9, 15]
40
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 4 => [:literal, :literal, '๐’ƒ', 16, 20]
42
-
43
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 0 => [:literal, :literal, 'mu๐„ž', 0, 6]
44
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 2 => [:literal, :literal, 'si', 7, 9]
46
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 4 => [:literal, :literal, '๐„ซc', 10, 15]
48
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
25
+ include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ',
26
+ 0 => [:literal, :literal, '๐’€๐’', 0, 2],
27
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
28
+ 2 => [:literal, :literal, '๐’‚ab', 3, 6],
29
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
30
+ 4 => [:literal, :literal, '๐’ƒ', 7, 8]
31
+
32
+ include_examples 'scan', 'mu๐„ž?si*๐„ซc+',
33
+ 0 => [:literal, :literal, 'mu๐„ž', 0, 3],
34
+ 1 => [:quantifier, :zero_or_one, '?', 3, 4],
35
+ 2 => [:literal, :literal, 'si', 4, 6],
36
+ 3 => [:quantifier, :zero_or_more, '*', 6, 7],
37
+ 4 => [:literal, :literal, '๐„ซc', 7, 9],
38
+ 5 => [:quantifier, :one_or_more, '+', 9, 10]
49
39
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to scan') do
4
+ def expect_type_tokens(tokens, type_tokens)
5
+ expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
6
+ end
7
+
8
+ it 'raises if if scanning from a Regexp and options are passed' do
9
+ expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
10
+ ArgumentError,
11
+ 'options cannot be supplied unless scanning a String'
12
+ )
13
+ end
14
+
15
+ it 'sets free_spacing based on options if scanning from a String' do
16
+ expect_type_tokens(
17
+ RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
18
+ [
19
+ %i[literal literal],
20
+ %i[quantifier one_or_more],
21
+ %i[free_space comment]
22
+ ]
23
+ )
24
+ end
25
+
26
+ it 'does not set free_spacing if scanning from a String and passing no options' do
27
+ expect_type_tokens(
28
+ RS.scan('a+#c'),
29
+ [
30
+ %i[literal literal],
31
+ %i[quantifier one_or_more],
32
+ %i[literal literal]
33
+ ]
34
+ )
35
+ end
36
+ end
@@ -1,20 +1,25 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Quantifier scanning') do
4
- include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
5
- include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
6
- include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
4
+ include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
5
+ include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
6
+ include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
7
7
 
8
- include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
9
- include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
10
- include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
8
+ include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
9
+ include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
10
+ include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
11
11
 
12
- include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
13
- include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
14
- include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
12
+ include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
13
+ include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
14
+ include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
15
15
 
16
- include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
17
- include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
18
- include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
19
- include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
16
+ include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
17
+ include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
18
+ include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
19
+ include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
20
+
21
+ # special case: chained quantifiers
22
+ include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier, :one_or_more, '+', 1, 2]
23
+ include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier, :interval, '{2}', 2, 5]
24
+ include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier, :interval, '{3}', 5, 8]
20
25
  end