regexp_parser 2.0.0 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -0
  3. data/Gemfile +6 -1
  4. data/README.md +1 -4
  5. data/Rakefile +8 -8
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/backref.rb +5 -0
  9. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  10. data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
  11. data/lib/regexp_parser/expression/classes/group.rb +12 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +1 -1
  13. data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
  14. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  15. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  16. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  17. data/lib/regexp_parser/expression/sequence.rb +3 -9
  18. data/lib/regexp_parser/expression/subexpression.rb +1 -1
  19. data/lib/regexp_parser/parser.rb +282 -334
  20. data/lib/regexp_parser/scanner.rb +1084 -1230
  21. data/lib/regexp_parser/scanner/scanner.rl +80 -110
  22. data/lib/regexp_parser/syntax.rb +8 -6
  23. data/lib/regexp_parser/syntax/any.rb +3 -3
  24. data/lib/regexp_parser/syntax/base.rb +1 -1
  25. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  26. data/lib/regexp_parser/version.rb +1 -1
  27. data/spec/expression/clone_spec.rb +36 -4
  28. data/spec/expression/free_space_spec.rb +2 -2
  29. data/spec/expression/methods/match_length_spec.rb +2 -2
  30. data/spec/expression/subexpression_spec.rb +1 -1
  31. data/spec/expression/to_s_spec.rb +28 -36
  32. data/spec/lexer/refcalls_spec.rb +5 -0
  33. data/spec/parser/all_spec.rb +2 -2
  34. data/spec/parser/errors_spec.rb +1 -1
  35. data/spec/parser/quantifiers_spec.rb +1 -0
  36. data/spec/parser/refcalls_spec.rb +5 -0
  37. data/spec/scanner/escapes_spec.rb +2 -1
  38. data/spec/scanner/groups_spec.rb +10 -1
  39. data/spec/scanner/refcalls_spec.rb +19 -0
  40. data/spec/scanner/sets_spec.rb +57 -14
  41. data/spec/spec_helper.rb +1 -0
  42. metadata +4 -3
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
10
10
  space = root[0]
11
11
 
12
12
  expect(space).to be_instance_of(FreeSpace::WhiteSpace)
13
- expect { space.quantify(:dummy, '#') }.to raise_error(RuntimeError)
13
+ expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
14
14
  end
15
15
 
16
16
  specify('comment quantify raises error') do
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
22
22
  comment = root[3]
23
23
 
24
24
  expect(comment).to be_instance_of(FreeSpace::Comment)
25
- expect { comment.quantify(:dummy, '#') }.to raise_error(RuntimeError)
25
+ expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
26
26
  end
27
27
  end
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe(Regexp::MatchLength) do
4
- ML = described_class
3
+ ML = Regexp::MatchLength
5
4
 
5
+ RSpec.describe(Regexp::MatchLength) do
6
6
  specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
7
7
  specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
8
8
  specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
32
32
  }
33
33
 
34
34
  root.each_expression do |exp|
35
- next unless expected_nesting_level = tests.delete(exp.to_s)
35
+ next unless (expected_nesting_level = tests.delete(exp.to_s))
36
36
  expect(expected_nesting_level).to eq exp.nesting_level
37
37
  end
38
38
 
@@ -1,58 +1,50 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Expression#to_s') do
4
- specify('literal alternation') do
5
- pattern = 'abcd|ghij|klmn|pqur'
4
+ def parse_frozen(pattern, ruby_version = nil)
5
+ IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
6
+ end
6
7
 
7
- expect(RP.parse(pattern).to_s).to eq pattern
8
+ def expect_round_trip(pattern, ruby_version = nil)
9
+ parsed = parse_frozen(pattern, ruby_version)
10
+
11
+ expect(parsed.to_s).to eql(pattern)
8
12
  end
9
13
 
10
- specify('quantified alternations') do
11
- pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
14
+ specify('literal alternation') do
15
+ expect_round_trip('abcd|ghij|klmn|pqur')
16
+ end
12
17
 
13
- expect(RP.parse(pattern).to_s).to eq pattern
18
+ specify('quantified alternations') do
19
+ expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
14
20
  end
15
21
 
16
22
  specify('quantified sets') do
17
- pattern = '[abc]+|[^def]{3,6}'
18
-
19
- expect(RP.parse(pattern).to_s).to eq pattern
23
+ expect_round_trip('[abc]+|[^def]{3,6}')
20
24
  end
21
25
 
22
26
  specify('property sets') do
23
- pattern = '[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+'
24
-
25
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
27
+ expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
26
28
  end
27
29
 
28
30
  specify('groups') do
29
- pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
30
-
31
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
31
+ expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
32
32
  end
33
33
 
34
34
  specify('assertions') do
35
- pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
36
-
37
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
35
+ expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
38
36
  end
39
37
 
40
38
  specify('comments') do
41
- pattern = '(?#start)a(?#middle)b(?#end)'
42
-
43
- expect(RP.parse(pattern).to_s).to eq pattern
39
+ expect_round_trip('(?#start)a(?#middle)b(?#end)')
44
40
  end
45
41
 
46
42
  specify('options') do
47
- pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
48
-
49
- expect(RP.parse(pattern).to_s).to eq pattern
43
+ expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
50
44
  end
51
45
 
52
46
  specify('url') do
53
- pattern = ('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
54
-
55
- expect(RP.parse(pattern).to_s).to eq pattern
47
+ expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
56
48
  end
57
49
 
58
50
  specify('multiline source') do
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
64
56
  \z
65
57
  /x
66
58
 
67
- expect(RP.parse(multiline).to_s).to eq multiline.source
59
+ expect(parse_frozen(multiline).to_s).to eql(multiline.source)
68
60
  end
69
61
 
70
62
  specify('multiline #to_s') do
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
76
68
  \z
77
69
  /x
78
70
 
79
- expect(RP.parse(multiline.to_s).to_s).to eq multiline.to_s
71
+ expect_round_trip(multiline.to_s)
80
72
  end
81
73
 
82
74
  # Free spacing expressions that use spaces between quantifiers and their
@@ -93,24 +85,24 @@ RSpec.describe('Expression#to_s') do
93
85
  /x
94
86
 
95
87
  str = 'bbbcged'
96
- root = RP.parse(multiline)
88
+ root = parse_frozen(multiline)
97
89
 
98
- expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
90
+ expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
99
91
  end
100
92
 
101
93
  # special case: implicit groups used for chained quantifiers produce no parens
102
94
  specify 'chained quantifiers #to_s' do
103
95
  pattern = /a+{1}{2}/
104
- root = RP.parse(pattern)
105
- expect(root.to_s).to eq 'a+{1}{2}'
96
+ root = parse_frozen(pattern)
97
+ expect(root.to_s).to eql('a+{1}{2}')
106
98
  end
107
99
 
108
100
  # regression test for https://github.com/ammar/regexp_parser/issues/74
109
101
  specify('non-ascii comment') do
110
102
  pattern = '(?x) 😋 # 😋'
111
103
  root = RP.parse(pattern)
112
- expect(root.last).to be_a Regexp::Expression::Comment
113
- expect(root.last.to_s).to eq '# 😋'
114
- expect(root.to_s).to eq pattern
104
+ expect(root.last).to be_a(Regexp::Expression::Comment)
105
+ expect(root.last.to_s).to eql('# 😋')
106
+ expect(root.to_s).to eql(pattern)
115
107
  end
116
108
  end
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
32
32
  include_examples 'lex', "(abc)\\g'1'",
33
33
  3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
34
 
35
+ include_examples 'lex', '\g<0>',
36
+ 0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
37
+ include_examples 'lex', "\\g'0'",
38
+ 0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
39
+
35
40
  include_examples 'lex', '(abc)\g<-1>',
36
41
  3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
42
  include_examples 'lex', "(abc)\\g'-1'",
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
34
34
  end
35
35
 
36
36
  specify('parse no quantifier target raises error') do
37
- expect { RP.parse('?abc') }.to raise_error(ArgumentError)
37
+ expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
38
38
  end
39
39
 
40
40
  specify('parse sequence no quantifier target raises error') do
41
- expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
41
+ expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
42
42
  end
43
43
  end
@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
9
9
  .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
10
  end
11
11
 
12
- RSpec.shared_examples 'UnknownTokenError' do |type, token|
12
+ RSpec.shared_examples 'UnknownTokenError' do |type|
13
13
  it "raises for unkown tokens of type #{type}" do
14
14
  expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
15
  .to raise_error(Regexp::Parser::UnknownTokenError)
@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
11
11
  expect(exp.quantifier.min).to eq min
12
12
  expect(exp.quantifier.max).to eq max
13
13
  expect(exp.quantifier.mode).to eq mode
14
+ expect(exp.quantifier.text).to eq text
14
15
  end
15
16
  end
16
17
 
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
29
29
  include_examples 'parse', /(abc)\g'1'/,
30
30
  1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
31
31
 
32
+ include_examples 'parse', '\g<0>',
33
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
34
+ include_examples 'parse', "\\g'0'",
35
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
36
+
32
37
  include_examples 'parse', /(abc)\g<-1>/,
33
38
  1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
34
39
  include_examples 'parse', /(abc)\g'-1'/,
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
4
4
  include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
5
 
6
6
  # not an escape outside a character set
7
- include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
8
 
9
9
  include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
10
  include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
@@ -27,6 +27,7 @@ RSpec.describe('Escape scanning') do
27
27
  include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
28
28
  include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
29
29
 
30
+ include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
30
31
  include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
31
32
  include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
32
33
 
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
5
5
  include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
6
  include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
7
 
8
+ # Named groups
9
+ # only names that start with a hyphen or digit (ascii or other) are invalid
8
10
  include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
11
  include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
-
11
12
  include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
13
  include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
14
+ include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
15
+ include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
16
+ include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
17
+ include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
18
+ include_examples 'scan', '(?<üüuuüü>abc)', 0 => [:group, :named_ab, '(?<üüuuüü>', 0,10]
19
+ include_examples 'scan', "(?'üüuuüü'abc)", 0 => [:group, :named_sq, "(?'üüuuüü'", 0,10]
20
+ include_examples 'scan', "(?<😋1234😋>abc)", 0 => [:group, :named_ab, "(?<😋1234😋>", 0,10]
21
+ include_examples 'scan', "(?'😋1234😋'abc)", 0 => [:group, :named_sq, "(?'😋1234😋'", 0,10]
13
22
 
14
23
  include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
24
  include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
@@ -5,9 +5,19 @@ RSpec.describe('RefCall scanning') do
5
5
  include_examples 'scan', '(abc)\1' , 3 => [:backref, :number, '\1', 5, 7]
6
6
 
7
7
  # Group back-references, named, numbered, and relative
8
+ #
9
+ # NOTE: only \g supports forward-looking references using '+', e.g. \g<+1>
10
+ # refers to the next group, but \k<+1> refers to a group named '+1'.
11
+ # Inversely, only \k supports addition or substraction of a recursion level.
12
+ # E.g. \k<x+0> refers to a group named 'x' at the current recursion level,
13
+ # but \g<x+0> refers to a a group named 'x+0'.
14
+ #
8
15
  include_examples 'scan', '(?<X>abc)\k<X>', 3 => [:backref, :name_ref_ab, '\k<X>', 9, 14]
9
16
  include_examples 'scan', "(?<X>abc)\\k'X'", 3 => [:backref, :name_ref_sq, "\\k'X'", 9, 14]
10
17
 
18
+ include_examples 'scan', '(?<+1>abc)\k<+1>', 3 => [:backref, :name_ref_ab, '\k<+1>', 10, 16]
19
+ include_examples 'scan', "(?<+1>abc)\\k'+1'", 3 => [:backref, :name_ref_sq, "\\k'+1'", 10, 16]
20
+
11
21
  include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10]
12
22
  include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10]
13
23
 
@@ -18,9 +28,15 @@ RSpec.describe('RefCall scanning') do
18
28
  include_examples 'scan', '(?<X>abc)\g<X>', 3 => [:backref, :name_call_ab, '\g<X>', 9, 14]
19
29
  include_examples 'scan', "(?<X>abc)\\g'X'", 3 => [:backref, :name_call_sq, "\\g'X'", 9, 14]
20
30
 
31
+ include_examples 'scan', '(?<X>abc)\g<X-1>', 3 => [:backref, :name_call_ab, '\g<X-1>', 9, 16]
32
+ include_examples 'scan', "(?<X>abc)\\g'X-1'", 3 => [:backref, :name_call_sq, "\\g'X-1'", 9, 16]
33
+
21
34
  include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10]
22
35
  include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10]
23
36
 
37
+ include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9]
38
+ include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9]
39
+
24
40
  include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11]
25
41
  include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11]
26
42
 
@@ -33,4 +49,7 @@ RSpec.describe('RefCall scanning') do
33
49
 
34
50
  include_examples 'scan', '(abc)\k<1-0>', 3 => [:backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12]
35
51
  include_examples 'scan', "(abc)\\k'1-0'", 3 => [:backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12]
52
+
53
+ include_examples 'scan', '(abc)\k<+1-0>', 3 => [:backref, :name_recursion_ref_ab, '\k<+1-0>', 5, 13]
54
+ include_examples 'scan', "(abc)\\k'+1-0'", 3 => [:backref, :name_recursion_ref_sq, "\\k'+1-0'", 5, 13]
36
55
  end
@@ -6,8 +6,18 @@ RSpec.describe('Set scanning') do
6
6
  include_examples 'scan', /[^n]/, 1 => [:set, :negate, '^', 1, 2]
7
7
 
8
8
  include_examples 'scan', /[c]/, 1 => [:literal, :literal, 'c', 1, 2]
9
- include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
10
- include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
9
+ include_examples 'scan', /[^d]/, 2 => [:literal, :literal, 'd', 2, 3]
10
+
11
+ include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
12
+ include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
13
+
14
+ include_examples 'scan', /[\a]/, 1 => [:escape, :bell, '\a', 1, 3]
15
+ include_examples 'scan', /[\e]/, 1 => [:escape, :escape, '\e', 1, 3]
16
+ include_examples 'scan', /[\f]/, 1 => [:escape, :form_feed, '\f', 1, 3]
17
+ include_examples 'scan', /[\n]/, 1 => [:escape, :newline, '\n', 1, 3]
18
+ include_examples 'scan', /[\r]/, 1 => [:escape, :carriage, '\r', 1, 3]
19
+ include_examples 'scan', /[\t]/, 1 => [:escape, :tab, '\t', 1, 3]
20
+ include_examples 'scan', /[\v]/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
11
21
 
12
22
  include_examples 'scan', /[.]/, 1 => [:literal, :literal, '.', 1, 2]
13
23
  include_examples 'scan', /[?]/, 1 => [:literal, :literal, '?', 1, 2]
@@ -18,22 +28,36 @@ RSpec.describe('Set scanning') do
18
28
  include_examples 'scan', /[<]/, 1 => [:literal, :literal, '<', 1, 2]
19
29
  include_examples 'scan', /[>]/, 1 => [:literal, :literal, '>', 1, 2]
20
30
 
21
- include_examples 'scan', /[\x20]/, 1 => [:escape, :hex, '\x20', 1, 5]
22
-
23
- include_examples 'scan', '[\.]', 1 => [:escape, :dot, '\.', 1, 3]
31
+ include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
32
+ include_examples 'scan', '[\u0040]', 1 => [:escape, :codepoint, '\u0040', 1, 7]
33
+ include_examples 'scan', '[\u{40}]', 1 => [:escape, :codepoint_list, '\u{40}', 1, 7]
34
+ include_examples 'scan', '[\c2]', 1 => [:escape, :control, '\c2', 1, 4]
35
+ include_examples 'scan', '[\C-C]', 1 => [:escape, :control, '\C-C', 1, 5]
36
+ include_examples 'scan', '[\x20]', 1 => [:escape, :hex, '\x20', 1, 5]
37
+ include_examples 'scan', '[\M-Z]', 1 => [:escape, :meta_sequence, '\M-Z', 1, 5]
38
+ include_examples 'scan', '[\M-\C-X]', 1 => [:escape, :meta_sequence, '\M-\C-X', 1, 8]
39
+ include_examples 'scan', '[\\[]', 1 => [:escape, :set_open, '\[', 1, 3]
40
+ include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
41
+ include_examples 'scan', '[a\-]', 2 => [:escape, :literal, '\-', 2, 4]
42
+ include_examples 'scan', '[\-c]', 1 => [:escape, :literal, '\-', 1, 3]
43
+ include_examples 'scan', '[\.]', 1 => [:escape, :literal, '\.', 1, 3]
44
+ include_examples 'scan', '[\?]', 1 => [:escape, :literal, '\?', 1, 3]
45
+ include_examples 'scan', '[\*]', 1 => [:escape, :literal, '\*', 1, 3]
46
+ include_examples 'scan', '[\+]', 1 => [:escape, :literal, '\+', 1, 3]
47
+ include_examples 'scan', '[\|]', 1 => [:escape, :literal, '\|', 1, 3]
48
+ include_examples 'scan', '[\{]', 1 => [:escape, :literal, '\{', 1, 3]
49
+ include_examples 'scan', '[\}]', 1 => [:escape, :literal, '\}', 1, 3]
50
+ include_examples 'scan', '[\(]', 1 => [:escape, :literal, '\(', 1, 3]
51
+ include_examples 'scan', '[\)]', 1 => [:escape, :literal, '\)', 1, 3]
24
52
  include_examples 'scan', '[\!]', 1 => [:escape, :literal, '\!', 1, 3]
25
53
  include_examples 'scan', '[\#]', 1 => [:escape, :literal, '\#', 1, 3]
26
- include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
27
- include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
28
54
  include_examples 'scan', '[\A]', 1 => [:escape, :literal, '\A', 1, 3]
29
55
  include_examples 'scan', '[\z]', 1 => [:escape, :literal, '\z', 1, 3]
30
56
  include_examples 'scan', '[\g]', 1 => [:escape, :literal, '\g', 1, 3]
31
57
  include_examples 'scan', '[\K]', 1 => [:escape, :literal, '\K', 1, 3]
32
58
  include_examples 'scan', '[\R]', 1 => [:escape, :literal, '\R', 1, 3]
33
59
  include_examples 'scan', '[\X]', 1 => [:escape, :literal, '\X', 1, 3]
34
- include_examples 'scan', '[\c2]', 1 => [:escape, :literal, '\c', 1, 3]
35
60
  include_examples 'scan', '[\B]', 1 => [:escape, :literal, '\B', 1, 3]
36
- include_examples 'scan', '[a\-c]', 2 => [:escape, :literal, '\-', 2, 4]
37
61
 
38
62
  include_examples 'scan', /[\d]/, 1 => [:type, :digit, '\d', 1, 3]
39
63
  include_examples 'scan', /[\da-z]/, 1 => [:type, :digit, '\d', 1, 3]
@@ -54,19 +78,23 @@ RSpec.describe('Set scanning') do
54
78
  include_examples 'scan', /[a-b-]/, 4 => [:literal, :literal, '-', 4, 5]
55
79
  include_examples 'scan', /[-a]/, 1 => [:literal, :literal, '-', 1, 2]
56
80
  include_examples 'scan', /[a-c^]/, 4 => [:literal, :literal, '^', 4, 5]
57
- include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
58
- include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
81
+ include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
82
+ include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
83
+ # this is a buggy range, it matches only `c`, but not `a`, `b` or `-`
84
+ include_examples 'scan', /[a-[c]]/, 2 => [:set, :range, '-', 2, 3]
85
+ # these are not ranges, they match `a`, `c` and `-` (or non-`-` if negated)
86
+ include_examples 'scan', /[[a]-[c]]/, 4 => [:literal, :literal, '-', 4, 5]
87
+ include_examples 'scan', /[[a]-c]/, 4 => [:literal, :literal, '-', 4, 5]
88
+ include_examples 'scan', /[^-c]/, 2 => [:literal, :literal, '-', 2, 3]
59
89
 
60
90
  include_examples 'scan', /[a[:digit:]c]/, 2 => [:posixclass, :digit, '[:digit:]', 2, 11]
61
91
  include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
62
92
  include_examples 'scan', /[[:^digit:]]/, 1 => [:nonposixclass, :digit, '[:^digit:]', 1, 11]
63
93
 
64
- include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
65
- include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
66
-
67
94
  include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
68
95
  include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
69
96
  include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
97
+ include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
70
98
 
71
99
  include_examples 'scan', /[a\p{digit}c]/, 2 => [:property, :digit, '\p{digit}', 2, 11]
72
100
  include_examples 'scan', /[a\P{digit}c]/, 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
@@ -88,6 +116,21 @@ RSpec.describe('Set scanning') do
88
116
  8 => [:set, :range, '-', 9, 10],
89
117
  10=> [:set, :close, ']', 11, 12]
90
118
 
119
+ # Collations/collating sequences and character equivalents are not enabled
120
+ # in Ruby at the moment. If they ever are, enable them in the scanner,
121
+ # add them to a new syntax version, and handle them in the parser. Until then,
122
+ # expect them to be scanned as regular subsets containing literals.
123
+ # include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
124
+ # include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
125
+ include_examples 'scan', '[a[.a-b.]c]',
126
+ 2 => [:set, :open, '[', 2, 3],
127
+ 3 => [:literal, :literal, '.', 3, 4],
128
+ 4 => [:literal, :literal, 'a', 4, 5]
129
+ include_examples 'scan', '[a[=e=]c]',
130
+ 2 => [:set, :open, '[', 2, 3],
131
+ 3 => [:literal, :literal, '=', 3, 4],
132
+ 4 => [:literal, :literal, 'e', 4, 5]
133
+
91
134
  # multi-byte characters should not affect indices
92
135
  include_examples 'scan', /[れます]/,
93
136
  0 => [:set, :open, '[', 0, 1],
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'ice_nine'
1
2
  require 'regexp_parser'
2
3
  require 'regexp_property_values'
3
4
  require_relative 'support/shared_examples'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-25 00:00:00.000000000 Z
11
+ date: 2021-02-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
14
  email:
@@ -23,6 +23,7 @@ files:
23
23
  - README.md
24
24
  - Rakefile
25
25
  - lib/regexp_parser.rb
26
+ - lib/regexp_parser/error.rb
26
27
  - lib/regexp_parser/expression.rb
27
28
  - lib/regexp_parser/expression/classes/alternation.rb
28
29
  - lib/regexp_parser/expression/classes/anchor.rb
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
184
  - !ruby/object:Gem::Version
184
185
  version: '0'
185
186
  requirements: []
186
- rubygems_version: 3.2.0.rc.1
187
+ rubygems_version: 3.2.3
187
188
  signing_key:
188
189
  specification_version: 4
189
190
  summary: Scanner, lexer, parser for ruby's regular expressions