regexp_parser 1.7.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +80 -1
  3. data/README.md +24 -12
  4. data/lib/regexp_parser/expression.rb +10 -19
  5. data/lib/regexp_parser/expression/classes/group.rb +17 -2
  6. data/lib/regexp_parser/expression/classes/root.rb +4 -16
  7. data/lib/regexp_parser/expression/quantifier.rb +9 -0
  8. data/lib/regexp_parser/expression/sequence.rb +0 -10
  9. data/lib/regexp_parser/lexer.rb +6 -6
  10. data/lib/regexp_parser/parser.rb +45 -12
  11. data/lib/regexp_parser/scanner.rb +1305 -1193
  12. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  13. data/lib/regexp_parser/scanner/property.rl +2 -2
  14. data/lib/regexp_parser/scanner/scanner.rl +194 -171
  15. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  16. data/lib/regexp_parser/version.rb +1 -1
  17. data/regexp_parser.gemspec +1 -1
  18. data/spec/expression/base_spec.rb +10 -0
  19. data/spec/expression/to_s_spec.rb +16 -0
  20. data/spec/lexer/delimiters_spec.rb +68 -0
  21. data/spec/lexer/literals_spec.rb +24 -49
  22. data/spec/parser/escapes_spec.rb +1 -1
  23. data/spec/parser/options_spec.rb +28 -0
  24. data/spec/parser/quantifiers_spec.rb +16 -0
  25. data/spec/parser/set/ranges_spec.rb +3 -3
  26. data/spec/scanner/delimiters_spec.rb +52 -0
  27. data/spec/scanner/errors_spec.rb +0 -1
  28. data/spec/scanner/escapes_spec.rb +10 -0
  29. data/spec/scanner/free_space_spec.rb +32 -0
  30. data/spec/scanner/literals_spec.rb +28 -38
  31. data/spec/scanner/options_spec.rb +36 -0
  32. data/spec/scanner/quantifiers_spec.rb +18 -13
  33. data/spec/scanner/sets_spec.rb +8 -2
  34. metadata +65 -61
  35. data/spec/expression/root_spec.rb +0 -9
  36. data/spec/expression/sequence_spec.rb +0 -9
@@ -74,9 +74,9 @@ module Regexp::Syntax
74
74
  end
75
75
 
76
76
  def warn_if_future_version(const_name)
77
- return if comparable_version(const_name) < comparable_version('3.0.0')
77
+ return if comparable_version(const_name) < comparable_version('4.0.0')
78
78
 
79
- warn('This library has only been tested up to Ruby 2.x, '\
79
+ warn('This library has only been tested up to Ruby 3.x, '\
80
80
  "but you are running with #{const_get(const_name).inspect}")
81
81
  end
82
82
  end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.7.0'
3
+ VERSION = '2.0.0'
4
4
  end
5
5
  end
@@ -32,5 +32,5 @@ Gem::Specification.new do |gem|
32
32
 
33
33
  gem.platform = Gem::Platform::RUBY
34
34
 
35
- gem.required_ruby_version = '>= 1.9.1'
35
+ gem.required_ruby_version = '>= 2.0.0'
36
36
  end
@@ -91,4 +91,14 @@ RSpec.describe(Regexp::Expression::Base) do
91
91
  expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
92
92
  expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
93
93
  end
94
+
95
+ specify('#base_length') do
96
+ expect(RP.parse(/(aa)/)[0].base_length).to eq 4
97
+ expect(RP.parse(/(aa){42}/)[0].base_length).to eq 4
98
+ end
99
+
100
+ specify('#full_length') do
101
+ expect(RP.parse(/(aa)/)[0].full_length).to eq 4
102
+ expect(RP.parse(/(aa){42}/)[0].full_length).to eq 8
103
+ end
94
104
  end
@@ -97,4 +97,20 @@ RSpec.describe('Expression#to_s') do
97
97
 
98
98
  expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
99
99
  end
100
+
101
+ # special case: implicit groups used for chained quantifiers produce no parens
102
+ specify 'chained quantifiers #to_s' do
103
+ pattern = /a+{1}{2}/
104
+ root = RP.parse(pattern)
105
+ expect(root.to_s).to eq 'a+{1}{2}'
106
+ end
107
+
108
+ # regression test for https://github.com/ammar/regexp_parser/issues/74
109
+ specify('non-ascii comment') do
110
+ pattern = '(?x) 😋 # 😋'
111
+ root = RP.parse(pattern)
112
+ expect(root.last).to be_a Regexp::Expression::Comment
113
+ expect(root.last.to_s).to eq '# 😋'
114
+ expect(root.to_s).to eq pattern
115
+ end
100
116
  end
@@ -0,0 +1,68 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter lexing') do
4
+ include_examples 'lex', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0]
6
+
7
+ include_examples 'lex', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2, 0, 0, 0]
9
+
10
+ include_examples 'lex', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1, 0, 0, 0]
12
+
13
+ include_examples 'lex', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2, 0, 0, 0]
15
+
16
+ include_examples 'lex', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2, 0, 0, 0]
18
+
19
+ include_examples 'lex', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2, 0, 0, 0]
21
+
22
+ include_examples 'lex', '}{+',
23
+ 0 => [:literal, :literal, '}', 0, 1, 0, 0, 0],
24
+ 1 => [:literal, :literal, '{', 1, 2, 0, 0, 0],
25
+ 2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
26
+
27
+ include_examples 'lex', '{{var}}',
28
+ 0 => [:literal, :literal, '{{var}}', 0, 7, 0, 0, 0]
29
+
30
+ include_examples 'lex', 'a{b}c',
31
+ 0 => [:literal, :literal, 'a{b}c', 0, 5, 0, 0, 0]
32
+
33
+ include_examples 'lex', 'a{1,2',
34
+ 0 => [:literal, :literal, 'a{1,2', 0, 5, 0, 0, 0]
35
+
36
+ include_examples 'lex', '({.+})',
37
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
38
+ 1 => [:literal, :literal, '{', 1, 2, 1, 0, 0],
39
+ 2 => [:meta, :dot, '.', 2, 3, 1, 0, 0],
40
+ 3 => [:quantifier, :one_or_more, '+', 3, 4, 1, 0, 0],
41
+ 4 => [:literal, :literal, '}', 4, 5, 1, 0, 0],
42
+ 5 => [:group, :close, ')', 5, 6, 0, 0, 0]
43
+
44
+ include_examples 'lex', ']',
45
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0]
46
+
47
+ include_examples 'lex', ']]',
48
+ 0 => [:literal, :literal, ']]', 0, 2, 0, 0, 0]
49
+
50
+ include_examples 'lex', ']\[',
51
+ 0 => [:literal, :literal, ']', 0, 1, 0, 0, 0],
52
+ 1 => [:escape, :set_open, '\[', 1, 3, 0, 0, 0]
53
+
54
+ include_examples 'lex', '()',
55
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
56
+ 1 => [:group, :close, ')', 1, 2, 0, 0, 0]
57
+
58
+ include_examples 'lex', '{abc:.+}}}[^}]]}',
59
+ 0 => [:literal, :literal, '{abc:', 0, 5, 0, 0, 0],
60
+ 1 => [:meta, :dot, '.', 5, 6, 0, 0, 0],
61
+ 2 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
62
+ 3 => [:literal, :literal, '}}}', 7, 10, 0, 0, 0],
63
+ 4 => [:set, :open, '[', 10, 11, 0, 0, 0],
64
+ 5 => [:set, :negate, '^', 11, 12, 0, 1, 0],
65
+ 6 => [:literal, :literal, '}', 12, 13, 0, 1, 0],
66
+ 7 => [:set, :close, ']', 13, 14, 0, 0, 0],
67
+ 8 => [:literal, :literal, ']}', 14, 16, 0, 0, 0]
68
+ end
@@ -10,67 +10,42 @@ RSpec.describe('Literal lexing') do
10
10
  1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
11
11
  2 => [:quantifier, :one_or_more, '+', 2, 3, 0, 0, 0]
12
12
 
13
- # 2 byte wide characters, Arabic
14
- include_examples 'lex', 'ا',
15
- 0 => [:literal, :literal, 'ا', 0, 2, 0, 0, 0]
16
-
17
- include_examples 'lex', 'aاbبcت',
18
- 0 => [:literal, :literal, 'aاbبcت', 0, 9, 0, 0, 0]
19
-
20
- include_examples 'lex', 'aاbبت?',
21
- 0 => [:literal, :literal, 'aاbب', 0, 6, 0, 0, 0],
22
- 1 => [:literal, :literal, 'ت', 6, 8, 0, 0, 0],
23
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0]
24
-
25
- include_examples 'lex', 'aا?bبcت+',
26
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
27
- 1 => [:literal, :literal, 'ا', 1, 3, 0, 0, 0],
28
- 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
29
- 3 => [:literal, :literal, 'bبc', 4, 8, 0, 0, 0],
30
- 4 => [:literal, :literal, 'ت', 8, 10, 0, 0, 0],
31
- 5 => [:quantifier, :one_or_more, '+', 10, 11, 0, 0, 0]
32
-
33
- include_examples 'lex', 'a(اbب+)cت?',
34
- 0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
35
- 1 => [:group, :capture, '(', 1, 2, 0, 0, 0],
36
- 2 => [:literal, :literal, 'اb', 2, 5, 1, 0, 0],
37
- 3 => [:literal, :literal, 'ب', 5, 7, 1, 0, 0],
38
- 4 => [:quantifier, :one_or_more, '+', 7, 8, 1, 0, 0],
39
- 5 => [:group, :close, ')', 8, 9, 0, 0, 0],
40
- 6 => [:literal, :literal, 'c', 9, 10, 0, 0, 0],
41
- 7 => [:literal, :literal, 'ت', 10, 12, 0, 0, 0],
42
- 8 => [:quantifier, :zero_or_one, '?', 12, 13, 0, 0, 0]
13
+ # 2 byte wide characters
14
+ include_examples 'lex', 'äöü+',
15
+ 0 => [:literal, :literal, 'äö', 0, 2, 0, 0, 0],
16
+ 1 => [:literal, :literal, 'ü', 2, 3, 0, 0, 0],
17
+ 2 => [:quantifier, :one_or_more, '+', 3, 4, 0, 0, 0]
43
18
 
44
19
  # 3 byte wide characters, Japanese
45
20
  include_examples 'lex', 'ab?れます+cd',
46
21
  0 => [:literal, :literal, 'a', 0, 1, 0, 0, 0],
47
22
  1 => [:literal, :literal, 'b', 1, 2, 0, 0, 0],
48
23
  2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
49
- 3 => [:literal, :literal, 'れま', 3, 9, 0, 0, 0],
50
- 4 => [:literal, :literal, 'す', 9, 12, 0, 0, 0],
51
- 5 => [:quantifier, :one_or_more, '+', 12, 13, 0, 0, 0],
52
- 6 => [:literal, :literal, 'cd', 13, 15, 0, 0, 0]
24
+ 3 => [:literal, :literal, 'れま', 3, 5, 0, 0, 0],
25
+ 4 => [:literal, :literal, 'す', 5, 6, 0, 0, 0],
26
+ 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
27
+ 6 => [:literal, :literal, 'cd', 7, 9, 0, 0, 0]
53
28
 
54
29
  # 4 byte wide characters, Osmanya
55
30
  include_examples 'lex', '𐒀𐒁?𐒂ab+𐒃',
56
- 0 => [:literal, :literal, '𐒀', 0, 4, 0, 0, 0],
57
- 1 => [:literal, :literal, '𐒁', 4, 8, 0, 0, 0],
58
- 2 => [:quantifier, :zero_or_one, '?', 8, 9, 0, 0, 0],
59
- 3 => [:literal, :literal, '𐒂a', 9, 14, 0, 0, 0],
60
- 4 => [:literal, :literal, 'b', 14, 15, 0, 0, 0],
61
- 5 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0],
62
- 6 => [:literal, :literal, '𐒃', 16, 20, 0, 0, 0]
31
+ 0 => [:literal, :literal, '𐒀', 0, 1, 0, 0, 0],
32
+ 1 => [:literal, :literal, '𐒁', 1, 2, 0, 0, 0],
33
+ 2 => [:quantifier, :zero_or_one, '?', 2, 3, 0, 0, 0],
34
+ 3 => [:literal, :literal, '𐒂a', 3, 5, 0, 0, 0],
35
+ 4 => [:literal, :literal, 'b', 5, 6, 0, 0, 0],
36
+ 5 => [:quantifier, :one_or_more, '+', 6, 7, 0, 0, 0],
37
+ 6 => [:literal, :literal, '𐒃', 7, 8, 0, 0, 0]
63
38
 
64
39
  include_examples 'lex', 'mu𝄞?si*𝄫c+',
65
40
  0 => [:literal, :literal, 'mu', 0, 2, 0, 0, 0],
66
- 1 => [:literal, :literal, '𝄞', 2, 6, 0, 0, 0],
67
- 2 => [:quantifier, :zero_or_one, '?', 6, 7, 0, 0, 0],
68
- 3 => [:literal, :literal, 's', 7, 8, 0, 0, 0],
69
- 4 => [:literal, :literal, 'i', 8, 9, 0, 0, 0],
70
- 5 => [:quantifier, :zero_or_more, '*', 9, 10, 0, 0, 0],
71
- 6 => [:literal, :literal, '𝄫', 10, 14, 0, 0, 0],
72
- 7 => [:literal, :literal, 'c', 14, 15, 0, 0, 0],
73
- 8 => [:quantifier, :one_or_more, '+', 15, 16, 0, 0, 0]
41
+ 1 => [:literal, :literal, '𝄞', 2, 3, 0, 0, 0],
42
+ 2 => [:quantifier, :zero_or_one, '?', 3, 4, 0, 0, 0],
43
+ 3 => [:literal, :literal, 's', 4, 5, 0, 0, 0],
44
+ 4 => [:literal, :literal, 'i', 5, 6, 0, 0, 0],
45
+ 5 => [:quantifier, :zero_or_more, '*', 6, 7, 0, 0, 0],
46
+ 6 => [:literal, :literal, '𝄫', 7, 8, 0, 0, 0],
47
+ 7 => [:literal, :literal, 'c', 8, 9, 0, 0, 0],
48
+ 8 => [:quantifier, :one_or_more, '+', 9, 10, 0, 0, 0]
74
49
 
75
50
  specify('lex single 2 byte char') do
76
51
  tokens = RL.lex("\u0627+")
@@ -25,7 +25,7 @@ RSpec.describe('EscapeSequence parsing') do
25
25
  include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
26
  include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
27
 
28
- # hex escapes
28
+ # hex escapes
29
29
  include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
30
 
31
31
  # octal escapes
@@ -0,0 +1,28 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('passing options to parse') do
4
+ it 'raises if if parsing from a Regexp and options are passed' do
5
+ expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
6
+ ArgumentError,
7
+ 'options cannot be supplied unless parsing a String'
8
+ )
9
+ end
10
+
11
+ it 'sets options if parsing from a String' do
12
+ root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)
13
+
14
+ expect(root.options).to eq(m: true, x: true)
15
+ end
16
+
17
+ it 'allows options to not be supplied when parsing from a Regexp' do
18
+ root = RP.parse(/a+/ix)
19
+
20
+ expect(root.options).to eq(i: true, x: true)
21
+ end
22
+
23
+ it 'has an empty option-hash when parsing from a String and passing no options' do
24
+ root = RP.parse('a+')
25
+
26
+ expect(root.options).to be_empty
27
+ end
28
+ end
@@ -35,6 +35,22 @@ RSpec.describe('Quantifier parsing') do
35
35
  include_examples 'quantifier', /a{4}b/, '{4}', :greedy, :interval, 4, 4
36
36
  include_examples 'quantifier', /a{4}?b/, '{4}?', :reluctant, :interval, 4, 4
37
37
  include_examples 'quantifier', /a{4}+b/, '{4}+', :possessive, :interval, 4, 4
38
+ include_examples 'quantifier', /a{004}+b/, '{004}+', :possessive, :interval, 4, 4
39
+
40
+ # special case: exps with chained quantifiers are wrapped in implicit passive groups
41
+ include_examples 'parse', /a+{2}{3}/,
42
+ 0 => [
43
+ :group, :passive, Group::Passive, implicit?: true, level: 0,
44
+ quantifier: Quantifier.new(:interval, '{3}', 3, 3, :greedy)
45
+ ],
46
+ [0, 0] => [
47
+ :group, :passive, Group::Passive, implicit?: true, level: 1,
48
+ quantifier: Quantifier.new(:interval, '{2}', 2, 2, :greedy)
49
+ ],
50
+ [0, 0, 0] => [
51
+ :literal, :literal, Literal, text: 'a', level: 2,
52
+ quantifier: Quantifier.new(:one_or_more, '+', 1, -1, :greedy)
53
+ ]
38
54
 
39
55
  specify('mode-checking methods') do
40
56
  exp = RP.parse(/a??/).first
@@ -17,7 +17,7 @@ RSpec.describe('CharacterSet::Range parsing') do
17
17
  end
18
18
 
19
19
  specify('parse set range hex') do
20
- root = RP.parse('[\\x00-\\x99]')
20
+ root = RP.parse('[\\x00-\\x22]')
21
21
  set = root[0]
22
22
  range = set[0]
23
23
 
@@ -26,9 +26,9 @@ RSpec.describe('CharacterSet::Range parsing') do
26
26
  expect(range.count).to eq 2
27
27
  expect(range.first.to_s).to eq '\\x00'
28
28
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
- expect(range.last.to_s).to eq '\\x99'
29
+ expect(range.last.to_s).to eq '\\x22'
30
30
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set).to match '\\x50'
31
+ expect(set).to match "\x11"
32
32
  end
33
33
 
34
34
  specify('parse set range unicode') do
@@ -0,0 +1,52 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Literal delimiter scanning') do
4
+ include_examples 'scan', '}',
5
+ 0 => [:literal, :literal, '}', 0, 1]
6
+
7
+ include_examples 'scan', '}}',
8
+ 0 => [:literal, :literal, '}}', 0, 2]
9
+
10
+ include_examples 'scan', '{',
11
+ 0 => [:literal, :literal, '{', 0, 1]
12
+
13
+ include_examples 'scan', '{{',
14
+ 0 => [:literal, :literal, '{{', 0, 2]
15
+
16
+ include_examples 'scan', '{}',
17
+ 0 => [:literal, :literal, '{}', 0, 2]
18
+
19
+ include_examples 'scan', '}{',
20
+ 0 => [:literal, :literal, '}{', 0, 2]
21
+
22
+ include_examples 'scan', '}{+',
23
+ 0 => [:literal, :literal, '}{', 0, 2]
24
+
25
+ include_examples 'scan', '{{var}}',
26
+ 0 => [:literal, :literal, '{{var}}', 0, 7]
27
+
28
+ include_examples 'scan', 'a{1,2',
29
+ 0 => [:literal, :literal, 'a{1,2', 0, 5]
30
+
31
+ include_examples 'scan', '({.+})',
32
+ 0 => [:group, :capture, '(', 0, 1],
33
+ 1 => [:literal, :literal, '{', 1, 2],
34
+ 2 => [:meta, :dot, '.', 2, 3],
35
+ 3 => [:quantifier, :one_or_more, '+', 3, 4],
36
+ 4 => [:literal, :literal, '}', 4, 5],
37
+ 5 => [:group, :close, ')', 5, 6]
38
+
39
+ include_examples 'scan', ']',
40
+ 0 => [:literal, :literal, ']', 0, 1]
41
+
42
+ include_examples 'scan', ']]',
43
+ 0 => [:literal, :literal, ']]', 0, 2]
44
+
45
+ include_examples 'scan', ']\[',
46
+ 0 => [:literal, :literal, ']', 0, 1],
47
+ 1 => [:escape, :set_open, '\[', 1, 3]
48
+
49
+ include_examples 'scan', '()',
50
+ 0 => [:group, :capture, '(', 0, 1],
51
+ 1 => [:group, :close, ')', 1, 2]
52
+ end
@@ -10,7 +10,6 @@ RSpec.describe(Regexp::Scanner) do
10
10
  include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
11
11
  include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
12
12
  include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
13
- include_examples 'scan error', RS::PrematureEndError, 'unbalanced interval', 'a{1,2'
14
13
  include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
15
14
  include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
16
15
  include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
@@ -11,7 +11,17 @@ RSpec.describe('Escape scanning') do
11
11
  include_examples 'scan', /c\tt/, 1 => [:escape, :tab, '\t', 1, 3]
12
12
  include_examples 'scan', /c\vt/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
13
13
 
14
+ # ineffectual literal escapes
15
+ # these cause "Unknown escape" warnings in Ruby for ascii chars,
16
+ # and simply drop the backslash for non-ascii chars (/\ü/.inspect == '/ü/').
17
+ # In terms of matching, Ruby treats them both like non-escaped literals.
14
18
  include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3]
19
+ include_examples 'scan', 'a\üc', 1 => [:escape, :literal, '\ü', 1, 3]
20
+ include_examples 'scan', 'a\😋c', 1 => [:escape, :literal, '\😋', 1, 3]
21
+
22
+ # these incomplete ref/call sequences are treated as literal escapes by Ruby
23
+ include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3]
24
+ include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3]
15
25
 
16
26
  include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5]
17
27
  include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
@@ -39,6 +39,17 @@ RSpec.describe('FreeSpace scanning') do
39
39
  11 => [:free_space, :comment, "# B ? comment\n", 37, 51],
40
40
  17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84],
41
41
  29 => [:free_space, :comment, "# D|E comment\n", 100, 114]
42
+
43
+ # single line / no trailing newline (c.f. issue #66)
44
+ include_examples 'scan', /a # b/x,
45
+ 0 => [:literal, :literal, 'a', 0, 1],
46
+ 1 => [:free_space, :whitespace, ' ', 1, 2],
47
+ 2 => [:free_space, :comment, "# b", 2, 5]
48
+
49
+ # without spaces (c.f. issue #66)
50
+ include_examples 'scan', /a#b/x,
51
+ 0 => [:literal, :literal, 'a', 0, 1],
52
+ 1 => [:free_space, :comment, "#b", 1, 3]
42
53
  end
43
54
 
44
55
  describe('scan free space inlined') do
@@ -130,4 +141,25 @@ RSpec.describe('FreeSpace scanning') do
130
141
  26 => [:literal, :literal, 'i j', 35, 38],
131
142
  27 => [:group, :close, ')', 38, 39]
132
143
  end
144
+
145
+ describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
146
+ include_examples 'scan', /a#bcd/,
147
+ 0 => [:literal, :literal, 'a#bcd', 0, 5]
148
+ include_examples 'scan', /a # bcd/,
149
+ 0 => [:literal, :literal, 'a # bcd', 0, 7]
150
+
151
+ include_examples 'scan', /a#\d/,
152
+ 0 => [:literal, :literal, 'a#', 0, 2],
153
+ 1 => [:type, :digit, '\d', 2, 4]
154
+ include_examples 'scan', /a # \d/,
155
+ 0 => [:literal, :literal, 'a # ', 0, 4],
156
+ 1 => [:type, :digit, '\d', 4, 6]
157
+
158
+ include_examples 'scan', /a#()/,
159
+ 0 => [:literal, :literal, 'a#', 0, 2],
160
+ 1 => [:group, :capture, '(', 2, 3]
161
+ include_examples 'scan', /a # ()/,
162
+ 0 => [:literal, :literal, 'a # ', 0, 4],
163
+ 1 => [:group, :capture, '(', 4, 5]
164
+ end
133
165
  end
@@ -2,48 +2,38 @@ require 'spec_helper'
2
2
 
3
3
  RSpec.describe('UTF8 scanning') do
4
4
  # ascii, single byte characters
5
- include_examples 'scan', 'a', 0 => [:literal, :literal, 'a', 0, 1]
5
+ include_examples 'scan', 'a',
6
+ 0 => [:literal, :literal, 'a', 0, 1]
6
7
 
7
- include_examples 'scan', 'ab+', 0 => [:literal, :literal, 'ab', 0, 2]
8
- include_examples 'scan', 'ab+', 1 => [:quantifier, :one_or_more, '+', 2, 3]
8
+ include_examples 'scan', 'ab+',
9
+ 0 => [:literal, :literal, 'ab', 0, 2],
10
+ 1 => [:quantifier, :one_or_more, '+', 2, 3]
9
11
 
10
- # 2 byte wide characters, Arabic
11
- include_examples 'scan', 'aاbبcت', 0 => [:literal, :literal, 'aاbبcت', 0, 9]
12
-
13
- include_examples 'scan', 'aاbبت?', 0 => [:literal, :literal, 'aاbبت', 0, 8]
14
- include_examples 'scan', 'aاbبت?', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
15
-
16
- include_examples 'scan', 'aا?bبcت+', 0 => [:literal, :literal, 'aا', 0, 3]
17
- include_examples 'scan', 'aا?bبcت+', 1 => [:quantifier, :zero_or_one, '?', 3, 4]
18
- include_examples 'scan', 'aا?bبcت+', 2 => [:literal, :literal, 'bبcت', 4, 10]
19
- include_examples 'scan', 'aا?bبcت+', 3 => [:quantifier, :one_or_more, '+', 10, 11]
20
-
21
- include_examples 'scan', 'a(اbب+)cت?', 0 => [:literal, :literal, 'a', 0, 1]
22
- include_examples 'scan', 'a(اbب+)cت?', 1 => [:group, :capture, '(', 1, 2]
23
- include_examples 'scan', 'a(اbب+)cت?', 2 => [:literal, :literal, 'اbب', 2, 7]
24
- include_examples 'scan', 'a(اbب+)cت?', 3 => [:quantifier, :one_or_more, '+', 7, 8]
25
- include_examples 'scan', 'a(اbب+)cت?', 4 => [:group, :close, ')', 8, 9]
26
- include_examples 'scan', 'a(اbب+)cت?', 5 => [:literal, :literal, 'cت', 9, 12]
27
- include_examples 'scan', 'a(اbب+)cت?', 6 => [:quantifier, :zero_or_one, '?', 12, 13]
12
+ # 2 byte wide characters
13
+ include_examples 'scan', 'äöü',
14
+ 0 => [:literal, :literal, 'äöü', 0, 3]
28
15
 
29
16
  # 3 byte wide characters, Japanese
30
- include_examples 'scan', 'ab?れます+cd', 0 => [:literal, :literal, 'ab', 0, 2]
31
- include_examples 'scan', 'ab?れます+cd', 1 => [:quantifier, :zero_or_one, '?', 2, 3]
32
- include_examples 'scan', 'ab?れます+cd', 2 => [:literal, :literal, 'れます', 3, 12]
33
- include_examples 'scan', 'ab?れます+cd', 3 => [:quantifier, :one_or_more, '+', 12, 13]
34
- include_examples 'scan', 'ab?れます+cd', 4 => [:literal, :literal, 'cd', 13, 15]
17
+ include_examples 'scan', 'ab?れます+cd',
18
+ 0 => [:literal, :literal, 'ab', 0, 2],
19
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
20
+ 2 => [:literal, :literal, 'れます', 3, 6],
21
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
22
+ 4 => [:literal, :literal, 'cd', 7, 9]
35
23
 
36
24
  # 4 byte wide characters, Osmanya
37
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 0 => [:literal, :literal, '𐒀𐒁', 0, 8]
38
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 1 => [:quantifier, :zero_or_one, '?', 8, 9]
39
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 2 => [:literal, :literal, '𐒂ab', 9, 15]
40
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 3 => [:quantifier, :one_or_more, '+', 15, 16]
41
- include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃', 4 => [:literal, :literal, '𐒃', 16, 20]
42
-
43
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 0 => [:literal, :literal, 'mu𝄞', 0, 6]
44
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 1 => [:quantifier, :zero_or_one, '?', 6, 7]
45
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 2 => [:literal, :literal, 'si', 7, 9]
46
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 3 => [:quantifier, :zero_or_more, '*', 9, 10]
47
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 4 => [:literal, :literal, '𝄫c', 10, 15]
48
- include_examples 'scan', 'mu𝄞?si*𝄫c+', 5 => [:quantifier, :one_or_more, '+', 15, 16]
25
+ include_examples 'scan', '𐒀𐒁?𐒂ab+𐒃',
26
+ 0 => [:literal, :literal, '𐒀𐒁', 0, 2],
27
+ 1 => [:quantifier, :zero_or_one, '?', 2, 3],
28
+ 2 => [:literal, :literal, '𐒂ab', 3, 6],
29
+ 3 => [:quantifier, :one_or_more, '+', 6, 7],
30
+ 4 => [:literal, :literal, '𐒃', 7, 8]
31
+
32
+ include_examples 'scan', 'mu𝄞?si*𝄫c+',
33
+ 0 => [:literal, :literal, 'mu𝄞', 0, 3],
34
+ 1 => [:quantifier, :zero_or_one, '?', 3, 4],
35
+ 2 => [:literal, :literal, 'si', 4, 6],
36
+ 3 => [:quantifier, :zero_or_more, '*', 6, 7],
37
+ 4 => [:literal, :literal, '𝄫c', 7, 9],
38
+ 5 => [:quantifier, :one_or_more, '+', 9, 10]
49
39
  end