regexp_parser 1.7.1 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +138 -0
  3. data/Gemfile +6 -1
  4. data/README.md +23 -11
  5. data/Rakefile +8 -8
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression.rb +13 -21
  8. data/lib/regexp_parser/expression/classes/backref.rb +5 -0
  9. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  10. data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
  11. data/lib/regexp_parser/expression/classes/group.rb +28 -3
  12. data/lib/regexp_parser/expression/classes/property.rb +1 -1
  13. data/lib/regexp_parser/expression/classes/root.rb +4 -16
  14. data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
  15. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  16. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  17. data/lib/regexp_parser/expression/quantifier.rb +10 -1
  18. data/lib/regexp_parser/expression/sequence.rb +3 -19
  19. data/lib/regexp_parser/expression/subexpression.rb +1 -1
  20. data/lib/regexp_parser/lexer.rb +6 -6
  21. data/lib/regexp_parser/parser.rb +325 -344
  22. data/lib/regexp_parser/scanner.rb +1320 -1385
  23. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  24. data/lib/regexp_parser/scanner/property.rl +2 -2
  25. data/lib/regexp_parser/scanner/scanner.rl +231 -253
  26. data/lib/regexp_parser/syntax.rb +8 -6
  27. data/lib/regexp_parser/syntax/any.rb +3 -3
  28. data/lib/regexp_parser/syntax/base.rb +1 -1
  29. data/lib/regexp_parser/syntax/version_lookup.rb +4 -4
  30. data/lib/regexp_parser/version.rb +1 -1
  31. data/regexp_parser.gemspec +1 -1
  32. data/spec/expression/base_spec.rb +10 -0
  33. data/spec/expression/clone_spec.rb +36 -4
  34. data/spec/expression/free_space_spec.rb +2 -2
  35. data/spec/expression/methods/match_length_spec.rb +2 -2
  36. data/spec/expression/subexpression_spec.rb +1 -1
  37. data/spec/expression/to_s_spec.rb +39 -31
  38. data/spec/lexer/literals_spec.rb +24 -49
  39. data/spec/lexer/refcalls_spec.rb +5 -0
  40. data/spec/parser/all_spec.rb +2 -2
  41. data/spec/parser/errors_spec.rb +1 -1
  42. data/spec/parser/escapes_spec.rb +1 -1
  43. data/spec/parser/options_spec.rb +28 -0
  44. data/spec/parser/quantifiers_spec.rb +16 -0
  45. data/spec/parser/refcalls_spec.rb +5 -0
  46. data/spec/parser/set/ranges_spec.rb +3 -3
  47. data/spec/scanner/escapes_spec.rb +12 -1
  48. data/spec/scanner/free_space_spec.rb +32 -0
  49. data/spec/scanner/groups_spec.rb +10 -1
  50. data/spec/scanner/literals_spec.rb +28 -38
  51. data/spec/scanner/options_spec.rb +36 -0
  52. data/spec/scanner/quantifiers_spec.rb +18 -13
  53. data/spec/scanner/refcalls_spec.rb +19 -0
  54. data/spec/scanner/sets_spec.rb +65 -16
  55. data/spec/spec_helper.rb +1 -0
  56. metadata +61 -60
  57. data/spec/expression/root_spec.rb +0 -9
  58. data/spec/expression/sequence_spec.rb +0 -9
@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
9
9
  .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
10
  end
11
11
 
12
- RSpec.shared_examples 'UnknownTokenError' do |type, token|
12
+ RSpec.shared_examples 'UnknownTokenError' do |type|
13
13
  it "raises for unkown tokens of type #{type}" do
14
14
  expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
15
  .to raise_error(Regexp::Parser::UnknownTokenError)
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
25
25
  include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
26
  include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
27
 
28
- # hex escapes
28
+ # hex escapes
29
29
  include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
30
 
31
31
  # octal escapes
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to parse') do
4
+ it 'raises if if parsing from a Regexp and options are passed' do
5
+ expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
6
+ ArgumentError,
7
+ 'options cannot be supplied unless parsing a String'
8
+ )
9
+ end
10
+
11
+ it 'sets options if parsing from a String' do
12
+ root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13
+
14
+ expect(root.options).to eq(m: true, x: true)
15
+ end
16
+
17
+ it 'allows options to not be supplied when parsing from a Regexp' do
18
+ root = RP.parse(/a+/ix)
19
+
20
+ expect(root.options).to eq(i: true, x: true)
21
+ end
22
+
23
+ it 'has an empty option-hash when parsing from a String and passing no options' do
24
+ root = RP.parse('a+')
25
+
26
+ expect(root.options).to be_empty
27
+ end
28
+ end
@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
11
11
  expect(exp.quantifier.min).to eq min
12
12
  expect(exp.quantifier.max).to eq max
13
13
  expect(exp.quantifier.mode).to eq mode
14
+ expect(exp.quantifier.text).to eq text
14
15
  end
15
16
  end
16
17
 
@@ -37,6 +38,21 @@ RSpec.describe('Quantifier parsing') do
37
38
  include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
38
39
  include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
39
40
 
41
+ # special case: exps with chained quantifiers are wrapped in implicit passive groups
42
+ include_examples 'parse', /a+{2}{3}/,
43
+ 0 => [
44
+ :group, :passive, Group::Passive, implicit?: true, level: 0,
45
+ quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
46
+ ],
47
+ [0, 0] => [
48
+ :group, :passive, Group::Passive, implicit?: true, level: 1,
49
+ quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
50
+ ],
51
+ [0, 0, 0] => [
52
+ :literal, :literal, Literal, text: 'a', level: 2,
53
+ quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
54
+ ]
55
+
40
56
  specify('mode-checking methods') do
41
57
  exp = RP.parse(/a??/).first
42
58
 
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
29
29
  include_examples 'parse', /(abc)\g'1'/,
30
30
  1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
31
31
 
32
+ include_examples 'parse', '\g<0>',
33
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
34
+ include_examples 'parse', "\\g'0'",
35
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
36
+
32
37
  include_examples 'parse', /(abc)\g<-1>/,
33
38
  1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
34
39
  include_examples 'parse', /(abc)\g'-1'/,
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
17
17
  end
18
18
 
19
19
  specify('parse set range hex') do
20
- root = RP.parse('[\\x00-\\x99]')
20
+ root = RP.parse('[\\x00-\\x22]')
21
21
  set = root[0]
22
22
  range = set[0]
23
23
 
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
26
26
  expect(range.count).to eq 2
27
27
  expect(range.first.to_s).to eq '\\x00'
28
28
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
- expect(range.last.to_s).to eq '\\x99'
29
+ expect(range.last.to_s).to eq '\\x22'
30
30
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match '\\x50'
31
+ expect(set).to match "\x11"
32
32
  end
33
33
 
34
34
  specify('parse set range unicode') do
@@ -4,19 +4,30 @@ RSpec.describe('Escape scanning') do
4
4
  include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
5
 
6
6
  # not an escape outside a character set
7
- include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
8
 
9
9
  include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
10
  include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
11
11
  include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
12
  include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
13
 
14
+ # ineffectual literal escapes
15
+ # these cause "Unknown escape" warnings in Ruby for ascii chars,
16
+ # and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
17
+ # In terms of matching, Ruby treats them both like non-escaped literals.
14
18
  include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
19
+ include_examples 'scan', 'a\üc', 1 => [:escape, :literal, '\ü', 1, 3]
20
+ include_examples 'scan', 'a\😋c', 1 => [:escape, :literal, '\😋', 1, 3]
21
+
22
+ # these incomplete ref/call sequences are treated as literal escapes by Ruby
23
+ include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
24
+ include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
15
25
 
16
26
  include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
27
  include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
18
28
  include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
19
29
 
30
+ include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
20
31
  include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
21
32
  include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
22
33
 
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
39
39
  11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
40
  17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
41
  29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+
43
+ # single line / no trailing newline (c.f. issue #66)
44
+ include_examples 'scan', /a # b/x,
45
+ 0 => [:literal, :literal, 'a', 0, 1],
46
+ 1 => [:free_space, :whitespace, ' ', 1, 2],
47
+ 2 => [:free_space, :comment, "# b", 2, 5]
48
+
49
+ # without spaces (c.f. issue #66)
50
+ include_examples 'scan', /a#b/x,
51
+ 0 => [:literal, :literal, 'a', 0, 1],
52
+ 1 => [:free_space, :comment, "#b", 1, 3]
42
53
  end
43
54
 
44
55
  describe('scan free space inlined') do
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
130
141
  26 => [:literal, :literal, 'i j', 35, 38],
131
142
  27 => [:group, :close, ')', 38, 39]
132
143
  end
144
+
145
+ describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
146
+ include_examples 'scan', /a#bcd/,
147
+ 0 => [:literal, :literal, 'a#bcd', 0, 5]
148
+ include_examples 'scan', /a # bcd/,
149
+ 0 => [:literal, :literal, 'a # bcd', 0, 7]
150
+
151
+ include_examples 'scan', /a#\d/,
152
+ 0 => [:literal, :literal, 'a#', 0, 2],
153
+ 1 => [:type, :digit, '\d', 2, 4]
154
+ include_examples 'scan', /a # \d/,
155
+ 0 => [:literal, :literal, 'a # ', 0, 4],
156
+ 1 => [:type, :digit, '\d', 4, 6]
157
+
158
+ include_examples 'scan', /a#()/,
159
+ 0 => [:literal, :literal, 'a#', 0, 2],
160
+ 1 => [:group, :capture, '(', 2, 3]
161
+ include_examples 'scan', /a # ()/,
162
+ 0 => [:literal, :literal, 'a # ', 0, 4],
163
+ 1 => [:group, :capture, '(', 4, 5]
164
+ end
133
165
  end
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
5
5
  include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
6
  include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
7
 
8
+ # Named groups
9
+ # only names that start with a hyphen or digit (ascii or other) are invalid
8
10
  include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
11
  include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
-
11
12
  include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
13
  include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
14
+ include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
15
+ include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
16
+ include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
17
+ include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
18
+ include_examples 'scan', '(?<üüuuüü>abc)', 0 => [:group, :named_ab, '(?<üüuuüü>', 0,10]
19
+ include_examples 'scan', "(?'üüuuüü'abc)", 0 => [:group, :named_sq, "(?'üüuuüü'", 0,10]
20
+ include_examples 'scan', "(?<😋1234😋>abc)", 0 => [:group, :named_ab, "(?<😋1234😋>", 0,10]
21
+ include_examples 'scan', "(?'😋1234😋'abc)", 0 => [:group, :named_sq, "(?'😋1234😋'", 0,10]
13
22
 
14
23
  include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
24
  include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
@@ -2,48 +2,38 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe('UTF8 scanning') do
4
4
  # ascii, single byte characters
5
- include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
5
+ include_examples 'scan', 'a',
6
+ 0 => [:literal, :literal, 'a', 0, 1]
6
7
 
7
- include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
- include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
8
+ include_examples 'scan', 'ab+',
9
+ 0 => [:literal, :literal, 'ab', 0, 2],
10
+ 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
11
 
10
- # 2 byte wide characters, Arabic
11
- include_examples 'scan', 'aاbبcت', 0 => [:literal, :literal, 'aاbبcت', 0, 9]
12
-
13
- include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
14
- include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
-
16
- include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
17
- include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
- include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
19
- include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
-
21
- include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
22
- include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
23
- include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
24
- include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
- include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
26
- include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
27
- include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
12
+ # 2 byte wide characters
13
+ include_examples 'scan', 'äöü',
14
+ 0 => [:literal, :literal, 'äöü', 0, 3]
28
15
 
29
16
  # 3 byte wide characters, Japanese
30
- include_examples 'scan', 'ab?れます+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
- include_examples 'scan', 'ab?れます+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
- include_examples 'scan', 'ab?れます+cd', 2 => [:literal, :literal, 'れます', 3, 12]
33
- include_examples 'scan', 'ab?れます+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
- include_examples 'scan', 'ab?れます+cd', 4 => [:literal, :literal, 'cd', 13, 15]
17
+ include_examples 'scan', 'ab?れます+cd',
18
+ 0 => [:literal, :literal, 'ab', 0, 2],
19
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
20
+ 2 => [:literal, :literal, 'れます', 3, 6],
21
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
22
+ 4 => [:literal, :literal, 'cd', 7, 9]
35
23
 
36
24
  # 4 byte wide characters, Osmanya
37
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 0 => [:literal, :literal, '𐒀𐒁', 0, 8]
38
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 2 => [:literal, :literal, '𐒂ab', 9, 15]
40
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 4 => [:literal, :literal, '𐒃', 16, 20]
42
-
43
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 0 => [:literal, :literal, 'mu𝄞', 0, 6]
44
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 2 => [:literal, :literal, 'si', 7, 9]
46
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 4 => [:literal, :literal, '𝄫c', 10, 15]
48
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
25
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
26
+ 0 => [:literal, :literal, '𐒀𐒁', 0, 2],
27
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
28
+ 2 => [:literal, :literal, '𐒂ab', 3, 6],
29
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
30
+ 4 => [:literal, :literal, '𐒃', 7, 8]
31
+
32
+ include_examples 'scan', 'mu𝄞?si*𝄫c+',
33
+ 0 => [:literal, :literal, 'mu𝄞', 0, 3],
34
+ 1 => [:quantifier, :zero_or_one, '?', 3, 4],
35
+ 2 => [:literal, :literal, 'si', 4, 6],
36
+ 3 => [:quantifier, :zero_or_more, '*', 6, 7],
37
+ 4 => [:literal, :literal, '𝄫c', 7, 9],
38
+ 5 => [:quantifier, :one_or_more, '+', 9, 10]
49
39
  end
@@ -0,0 +1,36 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to scan') do
4
+ def expect_type_tokens(tokens, type_tokens)
5
+ expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
6
+ end
7
+
8
+ it 'raises if if scanning from a Regexp and options are passed' do
9
+ expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
10
+ ArgumentError,
11
+ 'options cannot be supplied unless scanning a String'
12
+ )
13
+ end
14
+
15
+ it 'sets free_spacing based on options if scanning from a String' do
16
+ expect_type_tokens(
17
+ RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
18
+ [
19
+ %i[literal literal],
20
+ %i[quantifier one_or_more],
21
+ %i[free_space comment]
22
+ ]
23
+ )
24
+ end
25
+
26
+ it 'does not set free_spacing if scanning from a String and passing no options' do
27
+ expect_type_tokens(
28
+ RS.scan('a+#c'),
29
+ [
30
+ %i[literal literal],
31
+ %i[quantifier one_or_more],
32
+ %i[literal literal]
33
+ ]
34
+ )
35
+ end
36
+ end
@@ -1,20 +1,25 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Quantifier scanning') do
4
- include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
5
- include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
6
- include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
4
+ include_examples 'scan', 'a?', 1 => [:quantifier, :zero_or_one, '?', 1, 2]
5
+ include_examples 'scan', 'a??', 1 => [:quantifier, :zero_or_one_reluctant, '??', 1, 3]
6
+ include_examples 'scan', 'a?+', 1 => [:quantifier, :zero_or_one_possessive, '?+', 1, 3]
7
7
 
8
- include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
9
- include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
10
- include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
8
+ include_examples 'scan', 'a*', 1 => [:quantifier, :zero_or_more, '*', 1, 2]
9
+ include_examples 'scan', 'a*?', 1 => [:quantifier, :zero_or_more_reluctant, '*?', 1, 3]
10
+ include_examples 'scan', 'a*+', 1 => [:quantifier, :zero_or_more_possessive, '*+', 1, 3]
11
11
 
12
- include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
13
- include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
14
- include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
12
+ include_examples 'scan', 'a+', 1 => [:quantifier, :one_or_more, '+', 1, 2]
13
+ include_examples 'scan', 'a+?', 1 => [:quantifier, :one_or_more_reluctant, '+?', 1, 3]
14
+ include_examples 'scan', 'a++', 1 => [:quantifier, :one_or_more_possessive, '++', 1, 3]
15
15
 
16
- include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
17
- include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
18
- include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
19
- include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
16
+ include_examples 'scan', 'a{2}', 1 => [:quantifier, :interval, '{2}', 1, 4]
17
+ include_examples 'scan', 'a{2,}', 1 => [:quantifier, :interval, '{2,}', 1, 5]
18
+ include_examples 'scan', 'a{,2}', 1 => [:quantifier, :interval, '{,2}', 1, 5]
19
+ include_examples 'scan', 'a{2,4}', 1 => [:quantifier, :interval, '{2,4}', 1, 6]
20
+
21
+ # special case: chained quantifiers
22
+ include_examples 'scan', 'a+{2}{3}', 1 => [:quantifier, :one_or_more, '+', 1, 2]
23
+ include_examples 'scan', 'a+{2}{3}', 2 => [:quantifier, :interval, '{2}', 2, 5]
24
+ include_examples 'scan', 'a+{2}{3}', 3 => [:quantifier, :interval, '{3}', 5, 8]
20
25
  end
@@ -5,9 +5,19 @@ RSpec.describe('RefCall scanning') do
5
5
  include_examples 'scan', '(abc)\1' , 3 => [:backref, :number, '\1', 5, 7]
6
6
 
7
7
  # Group back-references, named, numbered, and relative
8
+ #
9
+ # NOTE: only \g supports forward-looking references using '+', e.g. \g<+1>
10
+ # refers to the next group, but \k<+1> refers to a group named '+1'.
11
+ # Inversely, only \k supports addition or substraction of a recursion level.
12
+ # E.g. \k<x+0> refers to a group named 'x' at the current recursion level,
13
+ # but \g<x+0> refers to a a group named 'x+0'.
14
+ #
8
15
  include_examples 'scan', '(?<X>abc)\k<X>', 3 => [:backref, :name_ref_ab, '\k<X>', 9, 14]
9
16
  include_examples 'scan', "(?<X>abc)\\k'X'", 3 => [:backref, :name_ref_sq, "\\k'X'", 9, 14]
10
17
 
18
+ include_examples 'scan', '(?<+1>abc)\k<+1>', 3 => [:backref, :name_ref_ab, '\k<+1>', 10, 16]
19
+ include_examples 'scan', "(?<+1>abc)\\k'+1'", 3 => [:backref, :name_ref_sq, "\\k'+1'", 10, 16]
20
+
11
21
  include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10]
12
22
  include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10]
13
23
 
@@ -18,9 +28,15 @@ RSpec.describe('RefCall scanning') do
18
28
  include_examples 'scan', '(?<X>abc)\g<X>', 3 => [:backref, :name_call_ab, '\g<X>', 9, 14]
19
29
  include_examples 'scan', "(?<X>abc)\\g'X'", 3 => [:backref, :name_call_sq, "\\g'X'", 9, 14]
20
30
 
31
+ include_examples 'scan', '(?<X>abc)\g<X-1>', 3 => [:backref, :name_call_ab, '\g<X-1>', 9, 16]
32
+ include_examples 'scan', "(?<X>abc)\\g'X-1'", 3 => [:backref, :name_call_sq, "\\g'X-1'", 9, 16]
33
+
21
34
  include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10]
22
35
  include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10]
23
36
 
37
+ include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9]
38
+ include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9]
39
+
24
40
  include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11]
25
41
  include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11]
26
42
 
@@ -33,4 +49,7 @@ RSpec.describe('RefCall scanning') do
33
49
 
34
50
  include_examples 'scan', '(abc)\k<1-0>', 3 => [:backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12]
35
51
  include_examples 'scan', "(abc)\\k'1-0'", 3 => [:backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12]
52
+
53
+ include_examples 'scan', '(abc)\k<+1-0>', 3 => [:backref, :name_recursion_ref_ab, '\k<+1-0>', 5, 13]
54
+ include_examples 'scan', "(abc)\\k'+1-0'", 3 => [:backref, :name_recursion_ref_sq, "\\k'+1-0'", 5, 13]
36
55
  end
@@ -6,8 +6,18 @@ RSpec.describe('Set scanning') do
6
6
  include_examples 'scan', /[^n]/, 1 => [:set, :negate, '^', 1, 2]
7
7
 
8
8
  include_examples 'scan', /[c]/, 1 => [:literal, :literal, 'c', 1, 2]
9
- include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
10
- include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
9
+ include_examples 'scan', /[^d]/, 2 => [:literal, :literal, 'd', 2, 3]
10
+
11
+ include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
12
+ include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
13
+
14
+ include_examples 'scan', /[\a]/, 1 => [:escape, :bell, '\a', 1, 3]
15
+ include_examples 'scan', /[\e]/, 1 => [:escape, :escape, '\e', 1, 3]
16
+ include_examples 'scan', /[\f]/, 1 => [:escape, :form_feed, '\f', 1, 3]
17
+ include_examples 'scan', /[\n]/, 1 => [:escape, :newline, '\n', 1, 3]
18
+ include_examples 'scan', /[\r]/, 1 => [:escape, :carriage, '\r', 1, 3]
19
+ include_examples 'scan', /[\t]/, 1 => [:escape, :tab, '\t', 1, 3]
20
+ include_examples 'scan', /[\v]/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
11
21
 
12
22
  include_examples 'scan', /[.]/, 1 => [:literal, :literal, '.', 1, 2]
13
23
  include_examples 'scan', /[?]/, 1 => [:literal, :literal, '?', 1, 2]
@@ -18,24 +28,36 @@ RSpec.describe('Set scanning') do
18
28
  include_examples 'scan', /[<]/, 1 => [:literal, :literal, '<', 1, 2]
19
29
  include_examples 'scan', /[>]/, 1 => [:literal, :literal, '>', 1, 2]
20
30
 
21
- include_examples 'scan', /[äöü]/, 2 => [:literal, :literal, 'ö', 3, 5]
22
-
23
- include_examples 'scan', /[\x20]/, 1 => [:escape, :hex, '\x20', 1, 5]
24
-
25
- include_examples 'scan', '[\.]', 1 => [:escape, :dot, '\.', 1, 3]
31
+ include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
32
+ include_examples 'scan', '[\u0040]', 1 => [:escape, :codepoint, '\u0040', 1, 7]
33
+ include_examples 'scan', '[\u{40}]', 1 => [:escape, :codepoint_list, '\u{40}', 1, 7]
34
+ include_examples 'scan', '[\c2]', 1 => [:escape, :control, '\c2', 1, 4]
35
+ include_examples 'scan', '[\C-C]', 1 => [:escape, :control, '\C-C', 1, 5]
36
+ include_examples 'scan', '[\x20]', 1 => [:escape, :hex, '\x20', 1, 5]
37
+ include_examples 'scan', '[\M-Z]', 1 => [:escape, :meta_sequence, '\M-Z', 1, 5]
38
+ include_examples 'scan', '[\M-\C-X]', 1 => [:escape, :meta_sequence, '\M-\C-X', 1, 8]
39
+ include_examples 'scan', '[\\[]', 1 => [:escape, :set_open, '\[', 1, 3]
40
+ include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
41
+ include_examples 'scan', '[a\-]', 2 => [:escape, :literal, '\-', 2, 4]
42
+ include_examples 'scan', '[\-c]', 1 => [:escape, :literal, '\-', 1, 3]
43
+ include_examples 'scan', '[\.]', 1 => [:escape, :literal, '\.', 1, 3]
44
+ include_examples 'scan', '[\?]', 1 => [:escape, :literal, '\?', 1, 3]
45
+ include_examples 'scan', '[\*]', 1 => [:escape, :literal, '\*', 1, 3]
46
+ include_examples 'scan', '[\+]', 1 => [:escape, :literal, '\+', 1, 3]
47
+ include_examples 'scan', '[\|]', 1 => [:escape, :literal, '\|', 1, 3]
48
+ include_examples 'scan', '[\{]', 1 => [:escape, :literal, '\{', 1, 3]
49
+ include_examples 'scan', '[\}]', 1 => [:escape, :literal, '\}', 1, 3]
50
+ include_examples 'scan', '[\(]', 1 => [:escape, :literal, '\(', 1, 3]
51
+ include_examples 'scan', '[\)]', 1 => [:escape, :literal, '\)', 1, 3]
26
52
  include_examples 'scan', '[\!]', 1 => [:escape, :literal, '\!', 1, 3]
27
53
  include_examples 'scan', '[\#]', 1 => [:escape, :literal, '\#', 1, 3]
28
- include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
29
- include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
30
54
  include_examples 'scan', '[\A]', 1 => [:escape, :literal, '\A', 1, 3]
31
55
  include_examples 'scan', '[\z]', 1 => [:escape, :literal, '\z', 1, 3]
32
56
  include_examples 'scan', '[\g]', 1 => [:escape, :literal, '\g', 1, 3]
33
57
  include_examples 'scan', '[\K]', 1 => [:escape, :literal, '\K', 1, 3]
34
58
  include_examples 'scan', '[\R]', 1 => [:escape, :literal, '\R', 1, 3]
35
59
  include_examples 'scan', '[\X]', 1 => [:escape, :literal, '\X', 1, 3]
36
- include_examples 'scan', '[\c2]', 1 => [:escape, :literal, '\c', 1, 3]
37
60
  include_examples 'scan', '[\B]', 1 => [:escape, :literal, '\B', 1, 3]
38
- include_examples 'scan', '[a\-c]', 2 => [:escape, :literal, '\-', 2, 4]
39
61
 
40
62
  include_examples 'scan', /[\d]/, 1 => [:type, :digit, '\d', 1, 3]
41
63
  include_examples 'scan', /[\da-z]/, 1 => [:type, :digit, '\d', 1, 3]
@@ -56,19 +78,23 @@ RSpec.describe('Set scanning') do
56
78
  include_examples 'scan', /[a-b-]/, 4 => [:literal, :literal, '-', 4, 5]
57
79
  include_examples 'scan', /[-a]/, 1 => [:literal, :literal, '-', 1, 2]
58
80
  include_examples 'scan', /[a-c^]/, 4 => [:literal, :literal, '^', 4, 5]
59
- include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
60
- include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
81
+ include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
82
+ include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
83
+ # this is a buggy range, it matches only `c`, but not `a`, `b` or `-`
84
+ include_examples 'scan', /[a-[c]]/, 2 => [:set, :range, '-', 2, 3]
85
+ # these are not ranges, they match `a`, `c` and `-` (or non-`-` if negated)
86
+ include_examples 'scan', /[[a]-[c]]/, 4 => [:literal, :literal, '-', 4, 5]
87
+ include_examples 'scan', /[[a]-c]/, 4 => [:literal, :literal, '-', 4, 5]
88
+ include_examples 'scan', /[^-c]/, 2 => [:literal, :literal, '-', 2, 3]
61
89
 
62
90
  include_examples 'scan', /[a[:digit:]c]/, 2 => [:posixclass, :digit, '[:digit:]', 2, 11]
63
91
  include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
64
92
  include_examples 'scan', /[[:^digit:]]/, 1 => [:nonposixclass, :digit, '[:^digit:]', 1, 11]
65
93
 
66
- include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
67
- include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
68
-
69
94
  include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
70
95
  include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
71
96
  include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
97
+ include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
72
98
 
73
99
  include_examples 'scan', /[a\p{digit}c]/, 2 => [:property, :digit, '\p{digit}', 2, 11]
74
100
  include_examples 'scan', /[a\P{digit}c]/, 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
@@ -90,6 +116,29 @@ RSpec.describe('Set scanning') do
90
116
  8 => [:set, :range, '-', 9, 10],
91
117
  10=> [:set, :close, ']', 11, 12]
92
118
 
119
+ # Collations/collating sequences and character equivalents are not enabled
120
+ # in Ruby at the moment. If they ever are, enable them in the scanner,
121
+ # add them to a new syntax version, and handle them in the parser. Until then,
122
+ # expect them to be scanned as regular subsets containing literals.
123
+ # include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
124
+ # include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
125
+ include_examples 'scan', '[a[.a-b.]c]',
126
+ 2 => [:set, :open, '[', 2, 3],
127
+ 3 => [:literal, :literal, '.', 3, 4],
128
+ 4 => [:literal, :literal, 'a', 4, 5]
129
+ include_examples 'scan', '[a[=e=]c]',
130
+ 2 => [:set, :open, '[', 2, 3],
131
+ 3 => [:literal, :literal, '=', 3, 4],
132
+ 4 => [:literal, :literal, 'e', 4, 5]
133
+
134
+ # multi-byte characters should not affect indices
135
+ include_examples 'scan', /[れます]/,
136
+ 0 => [:set, :open, '[', 0, 1],
137
+ 1 => [:literal, :literal, 'れ', 1, 2],
138
+ 2 => [:literal, :literal, 'ま', 2, 3],
139
+ 3 => [:literal, :literal, 'す', 3, 4],
140
+ 4 => [:set, :close, ']', 4, 5]
141
+
93
142
  specify('set literal encoding') do
94
143
  text = RS.scan('[a]')[1][2].to_s
95
144
  expect(text).to eq 'a'