regexp_parser 1.7.1 โ†’ 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,9 +74,9 @@ module Regexp::Syntax
74
74
  end
75
75
 
76
76
  def warn_if_future_version(const_name)
77
- return if comparable_version(const_name) < comparable_version('3.0.0')
77
+ return if comparable_version(const_name) < comparable_version('4.0.0')
78
78
 
79
- warn('This library has only been tested up to Ruby 2.x, '\
79
+ warn('This library has only been tested up to Ruby 3.x, '\
80
80
  "but you are running with #{const_get(const_name).inspect}")
81
81
  end
82
82
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.7.1'
3
+ VERSION = '2.0.1'
4
4
  end
5
5
  end
@@ -32,5 +32,5 @@ Gem::Specification.new do |gem|
32
32
 
33
33
  gem.platform = Gem::Platform::RUBY
34
34
 
35
- gem.required_ruby_version = '>= 1.9.1'
35
+ gem.required_ruby_version = '>= 2.0.0'
36
36
  end
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
91
91
  expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
92
92
  expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
93
93
  end
94
+
95
+ specify('#base_length') do
96
+ expect(RP.parse(/(aa)/)[0].base_length).to eq 4
97
+ expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
98
+ end
99
+
100
+ specify('#full_length') do
101
+ expect(RP.parse(/(aa)/)[0].full_length).to eq 4
102
+ expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
103
+ end
94
104
  end
@@ -97,4 +97,20 @@ RSpec.describe('Expression#to_s') do
97
97
 
98
98
  expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
99
99
  end
100
+
101
+ # special case: implicit groups used for chained quantifiers produce no parens
102
+ specify 'chained quantifiers #to_s' do
103
+ pattern = /a+{1}{2}/
104
+ root = RP.parse(pattern)
105
+ expect(root.to_s).to eq 'a+{1}{2}'
106
+ end
107
+
108
+ # regression test for https://github.com/ammar/regexp_parser/issues/74
109
+ specify('non-ascii comment') do
110
+ pattern = '(?x) ๐Ÿ˜‹ # ๐Ÿ˜‹'
111
+ root = RP.parse(pattern)
112
+ expect(root.last).to be_a Regexp::Expression::Comment
113
+ expect(root.last.to_s).to eq '# ๐Ÿ˜‹'
114
+ expect(root.to_s).to eq pattern
115
+ end
100
116
  end
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
10
10
  1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
11
  2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
12
 
13
- # 2 byte wide characters, Arabic
14
- include_examples 'lex', 'ุง',
15
- 0 => [:literal, :literal, 'ุง', 0, 2, 0, 0, 0]
16
-
17
- include_examples 'lex', 'aุงbุจcุช',
18
- 0 => [:literal, :literal, 'aุงbุจcุช', 0, 9, 0, 0, 0]
19
-
20
- include_examples 'lex', 'aุงbุจุช?',
21
- 0 => [:literal, :literal, 'aุงbุจ', 0, 6, 0, 0, 0],
22
- 1 => [:literal, :literal, 'ุช', 6, 8, 0, 0, 0],
23
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
24
-
25
- include_examples 'lex', 'aุง?bุจcุช+',
26
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
27
- 1 => [:literal, :literal, 'ุง', 1, 3, 0, 0, 0],
28
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
29
- 3 => [:literal, :literal, 'bุจc', 4, 8, 0, 0, 0],
30
- 4 => [:literal, :literal, 'ุช', 8, 10, 0, 0, 0],
31
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
32
-
33
- include_examples 'lex', 'a(ุงbุจ+)cุช?',
34
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
35
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
36
- 2 => [:literal, :literal, 'ุงb', 2, 5, 1, 0, 0],
37
- 3 => [:literal, :literal, 'ุจ', 5, 7, 1, 0, 0],
38
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
39
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
40
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
41
- 7 => [:literal, :literal, 'ุช', 10, 12, 0, 0, 0],
42
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
13
+ # 2 byte wide characters
14
+ include_examples 'lex', 'รครถรผ+',
15
+ 0 => [:literal, :literal, 'รครถ', 0, 2, 0, 0, 0],
16
+ 1 => [:literal, :literal, 'รผ', 2, 3, 0, 0, 0],
17
+ 2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
43
18
 
44
19
  # 3 byte wide characters, Japanese
45
20
  include_examples 'lex', 'ab?ใ‚Œใพใ™+cd',
46
21
  0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
47
22
  1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
48
23
  2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
49
- 3 => [:literal, :literal, 'ใ‚Œใพ', 3, 9, 0, 0, 0],
50
- 4 => [:literal, :literal, 'ใ™', 9, 12, 0, 0, 0],
51
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
52
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0]
24
+ 3 => [:literal, :literal, 'ใ‚Œใพ', 3, 5, 0, 0, 0],
25
+ 4 => [:literal, :literal, 'ใ™', 5, 6, 0, 0, 0],
26
+ 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
27
+ 6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
53
28
 
54
29
  # 4 byte wide characters, Osmanya
55
30
  include_examples 'lex', '๐’€๐’?๐’‚ab+๐’ƒ',
56
- 0 => [:literal, :literal, '๐’€', 0, 4, 0, 0, 0],
57
- 1 => [:literal, :literal, '๐’', 4, 8, 0, 0, 0],
58
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
59
- 3 => [:literal, :literal, '๐’‚a', 9, 14, 0, 0, 0],
60
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
61
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
62
- 6 => [:literal, :literal, '๐’ƒ', 16, 20, 0, 0, 0]
31
+ 0 => [:literal, :literal, '๐’€', 0, 1, 0, 0, 0],
32
+ 1 => [:literal, :literal, '๐’', 1, 2, 0, 0, 0],
33
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
34
+ 3 => [:literal, :literal, '๐’‚a', 3, 5, 0, 0, 0],
35
+ 4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
36
+ 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
37
+ 6 => [:literal, :literal, '๐’ƒ', 7, 8, 0, 0, 0]
63
38
 
64
39
  include_examples 'lex', 'mu๐„ž?si*๐„ซc+',
65
40
  0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
66
- 1 => [:literal, :literal, '๐„ž', 2, 6, 0, 0, 0],
67
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
68
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
69
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
70
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
71
- 6 => [:literal, :literal, '๐„ซ', 10, 14, 0, 0, 0],
72
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
73
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
41
+ 1 => [:literal, :literal, '๐„ž', 2, 3, 0, 0, 0],
42
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
43
+ 3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
44
+ 4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
45
+ 5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
46
+ 6 => [:literal, :literal, '๐„ซ', 7, 8, 0, 0, 0],
47
+ 7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
48
+ 8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
74
49
 
75
50
  specify('lex single 2 byte char') do
76
51
  tokens = RL.lex("\u0627+")
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
25
25
  include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
26
  include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
27
 
28
- # hex escapes
28
+ # hex escapes
29
29
  include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
30
 
31
31
  # octal escapes
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to parse') do
4
+ it 'raises if if parsing from a Regexp and options are passed' do
5
+ expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
6
+ ArgumentError,
7
+ 'options cannot be supplied unless parsing a String'
8
+ )
9
+ end
10
+
11
+ it 'sets options if parsing from a String' do
12
+ root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13
+
14
+ expect(root.options).to eq(m: true, x: true)
15
+ end
16
+
17
+ it 'allows options to not be supplied when parsing from a Regexp' do
18
+ root = RP.parse(/a+/ix)
19
+
20
+ expect(root.options).to eq(i: true, x: true)
21
+ end
22
+
23
+ it 'has an empty option-hash when parsing from a String and passing no options' do
24
+ root = RP.parse('a+')
25
+
26
+ expect(root.options).to be_empty
27
+ end
28
+ end
@@ -37,6 +37,21 @@ RSpec.describe('Quantifier parsing') do
37
37
  include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
38
38
  include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
39
39
 
40
+ # special case: exps with chained quantifiers are wrapped in implicit passive groups
41
+ include_examples 'parse', /a+{2}{3}/,
42
+ 0 => [
43
+ :group, :passive, Group::Passive, implicit?: true, level: 0,
44
+ quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
45
+ ],
46
+ [0, 0] => [
47
+ :group, :passive, Group::Passive, implicit?: true, level: 1,
48
+ quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
49
+ ],
50
+ [0, 0, 0] => [
51
+ :literal, :literal, Literal, text: 'a', level: 2,
52
+ quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
53
+ ]
54
+
40
55
  specify('mode-checking methods') do
41
56
  exp = RP.parse(/a??/).first
42
57
 
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
17
17
  end
18
18
 
19
19
  specify('parse set range hex') do
20
- root = RP.parse('[\\x00-\\x99]')
20
+ root = RP.parse('[\\x00-\\x22]')
21
21
  set = root[0]
22
22
  range = set[0]
23
23
 
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
26
26
  expect(range.count).to eq 2
27
27
  expect(range.first.to_s).to eq '\\x00'
28
28
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
- expect(range.last.to_s).to eq '\\x99'
29
+ expect(range.last.to_s).to eq '\\x22'
30
30
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match '\\x50'
31
+ expect(set).to match "\x11"
32
32
  end
33
33
 
34
34
  specify('parse set range unicode') do
@@ -11,12 +11,23 @@ RSpec.describe('Escape scanning') do
11
11
  include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
12
  include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
13
 
14
+ # ineffectual literal escapes
15
+ # these cause "Unknown escape" warnings in Ruby for ascii chars,
16
+ # and simply drop the backslash for non-ascii chars (/\รผ/.inspect == '/รผ/').
17
+ # In terms of matching, Ruby treats them both like non-escaped literals.
14
18
  include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
19
+ include_examples 'scan', 'a\รผc', 1 => [:escape, :literal, '\รผ', 1, 3]
20
+ include_examples 'scan', 'a\๐Ÿ˜‹c', 1 => [:escape, :literal, '\๐Ÿ˜‹', 1, 3]
21
+
22
+ # these incomplete ref/call sequences are treated as literal escapes by Ruby
23
+ include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
24
+ include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
15
25
 
16
26
  include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
27
  include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
18
28
  include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
19
29
 
30
+ include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
20
31
  include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
21
32
  include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
22
33
 
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
39
39
  11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
40
  17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
41
  29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+
43
+ # single line / no trailing newline (c.f. issue #66)
44
+ include_examples 'scan', /a # b/x,
45
+ 0 => [:literal, :literal, 'a', 0, 1],
46
+ 1 => [:free_space, :whitespace, ' ', 1, 2],
47
+ 2 => [:free_space, :comment, "# b", 2, 5]
48
+
49
+ # without spaces (c.f. issue #66)
50
+ include_examples 'scan', /a#b/x,
51
+ 0 => [:literal, :literal, 'a', 0, 1],
52
+ 1 => [:free_space, :comment, "#b", 1, 3]
42
53
  end
43
54
 
44
55
  describe('scan free space inlined') do
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
130
141
  26 => [:literal, :literal, 'i j', 35, 38],
131
142
  27 => [:group, :close, ')', 38, 39]
132
143
  end
144
+
145
+ describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
146
+ include_examples 'scan', /a#bcd/,
147
+ 0 => [:literal, :literal, 'a#bcd', 0, 5]
148
+ include_examples 'scan', /a # bcd/,
149
+ 0 => [:literal, :literal, 'a # bcd', 0, 7]
150
+
151
+ include_examples 'scan', /a#\d/,
152
+ 0 => [:literal, :literal, 'a#', 0, 2],
153
+ 1 => [:type, :digit, '\d', 2, 4]
154
+ include_examples 'scan', /a # \d/,
155
+ 0 => [:literal, :literal, 'a # ', 0, 4],
156
+ 1 => [:type, :digit, '\d', 4, 6]
157
+
158
+ include_examples 'scan', /a#()/,
159
+ 0 => [:literal, :literal, 'a#', 0, 2],
160
+ 1 => [:group, :capture, '(', 2, 3]
161
+ include_examples 'scan', /a # ()/,
162
+ 0 => [:literal, :literal, 'a # ', 0, 4],
163
+ 1 => [:group, :capture, '(', 4, 5]
164
+ end
133
165
  end
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
5
5
  include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
6
  include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
7
 
8
+ # Named groups
9
+ # only names that start with a hyphen or digit (ascii or other) are invalid
8
10
  include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
11
  include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
-
11
12
  include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
13
  include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
14
+ include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
15
+ include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
16
+ include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
17
+ include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
18
+ include_examples 'scan', '(?<รผรผuuรผรผ>abc)', 0 => [:group, :named_ab, '(?<รผรผuuรผรผ>', 0,10]
19
+ include_examples 'scan', "(?'รผรผuuรผรผ'abc)", 0 => [:group, :named_sq, "(?'รผรผuuรผรผ'", 0,10]
20
+ include_examples 'scan', "(?<๐Ÿ˜‹1234๐Ÿ˜‹>abc)", 0 => [:group, :named_ab, "(?<๐Ÿ˜‹1234๐Ÿ˜‹>", 0,10]
21
+ include_examples 'scan', "(?'๐Ÿ˜‹1234๐Ÿ˜‹'abc)", 0 => [:group, :named_sq, "(?'๐Ÿ˜‹1234๐Ÿ˜‹'", 0,10]
13
22
 
14
23
  include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
24
  include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
@@ -2,48 +2,38 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe('UTF8 scanning') do
4
4
  # ascii, single byte characters
5
- include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
5
+ include_examples 'scan', 'a',
6
+ 0 => [:literal, :literal, 'a', 0, 1]
6
7
 
7
- include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
- include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
8
+ include_examples 'scan', 'ab+',
9
+ 0 => [:literal, :literal, 'ab', 0, 2],
10
+ 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
11
 
10
- # 2 byte wide characters, Arabic
11
- include_examples 'scan', 'aุงbุจcุช', 0 => [:literal, :literal, 'aุงbุจcุช', 0, 9]
12
-
13
- include_examples 'scan', 'aุงbุจุช?', 0 => [:literal, :literal, 'aุงbุจุช', 0, 8]
14
- include_examples 'scan', 'aุงbุจุช?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
-
16
- include_examples 'scan', 'aุง?bุจcุช+', 0 => [:literal, :literal, 'aุง', 0, 3]
17
- include_examples 'scan', 'aุง?bุจcุช+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
- include_examples 'scan', 'aุง?bุจcุช+', 2 => [:literal, :literal, 'bุจcุช', 4, 10]
19
- include_examples 'scan', 'aุง?bุจcุช+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
-
21
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 0 => [:literal, :literal, 'a', 0, 1]
22
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 1 => [:group, :capture, '(', 1, 2]
23
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 2 => [:literal, :literal, 'ุงbุจ', 2, 7]
24
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 4 => [:group, :close, ')', 8, 9]
26
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 5 => [:literal, :literal, 'cุช', 9, 12]
27
- include_examples 'scan', 'a(ุงbุจ+)cุช?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
12
+ # 2 byte wide characters
13
+ include_examples 'scan', 'รครถรผ',
14
+ 0 => [:literal, :literal, 'รครถรผ', 0, 3]
28
15
 
29
16
  # 3 byte wide characters, Japanese
30
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 2 => [:literal, :literal, 'ใ‚Œใพใ™', 3, 12]
33
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
- include_examples 'scan', 'ab?ใ‚Œใพใ™+cd', 4 => [:literal, :literal, 'cd', 13, 15]
17
+ include_examples 'scan', 'ab?ใ‚Œใพใ™+cd',
18
+ 0 => [:literal, :literal, 'ab', 0, 2],
19
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
20
+ 2 => [:literal, :literal, 'ใ‚Œใพใ™', 3, 6],
21
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
22
+ 4 => [:literal, :literal, 'cd', 7, 9]
35
23
 
36
24
  # 4 byte wide characters, Osmanya
37
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 0 => [:literal, :literal, '๐’€๐’', 0, 8]
38
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 2 => [:literal, :literal, '๐’‚ab', 9, 15]
40
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
- include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ', 4 => [:literal, :literal, '๐’ƒ', 16, 20]
42
-
43
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 0 => [:literal, :literal, 'mu๐„ž', 0, 6]
44
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 2 => [:literal, :literal, 'si', 7, 9]
46
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 4 => [:literal, :literal, '๐„ซc', 10, 15]
48
- include_examples 'scan', 'mu๐„ž?si*๐„ซc+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
25
+ include_examples 'scan', '๐’€๐’?๐’‚ab+๐’ƒ',
26
+ 0 => [:literal, :literal, '๐’€๐’', 0, 2],
27
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
28
+ 2 => [:literal, :literal, '๐’‚ab', 3, 6],
29
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
30
+ 4 => [:literal, :literal, '๐’ƒ', 7, 8]
31
+
32
+ include_examples 'scan', 'mu๐„ž?si*๐„ซc+',
33
+ 0 => [:literal, :literal, 'mu๐„ž', 0, 3],
34
+ 1 => [:quantifier, :zero_or_one, '?', 3, 4],
35
+ 2 => [:literal, :literal, 'si', 4, 6],
36
+ 3 => [:quantifier, :zero_or_more, '*', 6, 7],
37
+ 4 => [:literal, :literal, '๐„ซc', 7, 9],
38
+ 5 => [:quantifier, :one_or_more, '+', 9, 10]
49
39
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to scan') do
4
+ def expect_type_tokens(tokens, type_tokens)
5
+ expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
6
+ end
7
+
8
+ it 'raises if if scanning from a Regexp and options are passed' do
9
+ expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
10
+ ArgumentError,
11
+ 'options cannot be supplied unless scanning a String'
12
+ )
13
+ end
14
+
15
+ it 'sets free_spacing based on options if scanning from a String' do
16
+ expect_type_tokens(
17
+ RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
18
+ [
19
+ %i[literal literal],
20
+ %i[quantifier one_or_more],
21
+ %i[free_space comment]
22
+ ]
23
+ )
24
+ end
25
+
26
+ it 'does not set free_spacing if scanning from a String and passing no options' do
27
+ expect_type_tokens(
28
+ RS.scan('a+#c'),
29
+ [
30
+ %i[literal literal],
31
+ %i[quantifier one_or_more],
32
+ %i[literal literal]
33
+ ]
34
+ )
35
+ end
36
+ end
@@ -1,20 +1,25 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Quantifier scanning') do
4
- include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
5
- include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
6
- include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
4
+ include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
5
+ include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
6
+ include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
7
7
 
8
- include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
9
- include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
10
- include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
8
+ include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
9
+ include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
10
+ include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
11
11
 
12
- include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
13
- include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
14
- include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
12
+ include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
13
+ include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
14
+ include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
15
15
 
16
- include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
17
- include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
18
- include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
19
- include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
16
+ include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
17
+ include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
18
+ include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
19
+ include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
20
+
21
+ # special case: chained quantifiers
22
+ include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier, :one_or_more, '+', 1, 2]
23
+ include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier, :interval, '{2}', 2, 5]
24
+ include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier, :interval, '{3}', 5, 8]
20
25
  end