regexp_parser 2.0.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -0
  3. data/Gemfile +6 -1
  4. data/README.md +1 -4
  5. data/Rakefile +8 -8
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/backref.rb +5 -0
  9. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  10. data/lib/regexp_parser/expression/classes/free_space.rb +2 -2
  11. data/lib/regexp_parser/expression/classes/group.rb +12 -2
  12. data/lib/regexp_parser/expression/classes/property.rb +1 -1
  13. data/lib/regexp_parser/expression/classes/set/range.rb +2 -1
  14. data/lib/regexp_parser/expression/methods/match_length.rb +2 -2
  15. data/lib/regexp_parser/expression/methods/traverse.rb +2 -2
  16. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  17. data/lib/regexp_parser/expression/sequence.rb +3 -9
  18. data/lib/regexp_parser/expression/subexpression.rb +1 -1
  19. data/lib/regexp_parser/parser.rb +282 -334
  20. data/lib/regexp_parser/scanner.rb +1084 -1230
  21. data/lib/regexp_parser/scanner/scanner.rl +80 -110
  22. data/lib/regexp_parser/syntax.rb +8 -6
  23. data/lib/regexp_parser/syntax/any.rb +3 -3
  24. data/lib/regexp_parser/syntax/base.rb +1 -1
  25. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  26. data/lib/regexp_parser/version.rb +1 -1
  27. data/spec/expression/clone_spec.rb +36 -4
  28. data/spec/expression/free_space_spec.rb +2 -2
  29. data/spec/expression/methods/match_length_spec.rb +2 -2
  30. data/spec/expression/subexpression_spec.rb +1 -1
  31. data/spec/expression/to_s_spec.rb +28 -36
  32. data/spec/lexer/refcalls_spec.rb +5 -0
  33. data/spec/parser/all_spec.rb +2 -2
  34. data/spec/parser/errors_spec.rb +1 -1
  35. data/spec/parser/quantifiers_spec.rb +1 -0
  36. data/spec/parser/refcalls_spec.rb +5 -0
  37. data/spec/scanner/escapes_spec.rb +2 -1
  38. data/spec/scanner/groups_spec.rb +10 -1
  39. data/spec/scanner/refcalls_spec.rb +19 -0
  40. data/spec/scanner/sets_spec.rb +57 -14
  41. data/spec/spec_helper.rb +1 -0
  42. metadata +4 -3
@@ -10,7 +10,7 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
10
10
  space = root[0]
11
11
 
12
12
  expect(space).to be_instance_of(FreeSpace::WhiteSpace)
13
- expect { space.quantify(:dummy, '#') }.to raise_error(RuntimeError)
13
+ expect { space.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
14
14
  end
15
15
 
16
16
  specify('comment quantify raises error') do
@@ -22,6 +22,6 @@ RSpec.describe(Regexp::Expression::FreeSpace) do
22
22
  comment = root[3]
23
23
 
24
24
  expect(comment).to be_instance_of(FreeSpace::Comment)
25
- expect { comment.quantify(:dummy, '#') }.to raise_error(RuntimeError)
25
+ expect { comment.quantify(:dummy, '#') }.to raise_error(Regexp::Parser::Error)
26
26
  end
27
27
  end
@@ -1,8 +1,8 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe(Regexp::MatchLength) do
4
- ML = described_class
3
+ ML = Regexp::MatchLength
5
4
 
5
+ RSpec.describe(Regexp::MatchLength) do
6
6
  specify('literal') { expect(ML.of(/a/).minmax).to eq [1, 1] }
7
7
  specify('literal sequence') { expect(ML.of(/abc/).minmax).to eq [3, 3] }
8
8
  specify('dot') { expect(ML.of(/./).minmax).to eq [1, 1] }
@@ -32,7 +32,7 @@ RSpec.describe(Regexp::Expression::Subexpression) do
32
32
  }
33
33
 
34
34
  root.each_expression do |exp|
35
- next unless expected_nesting_level = tests.delete(exp.to_s)
35
+ next unless (expected_nesting_level = tests.delete(exp.to_s))
36
36
  expect(expected_nesting_level).to eq exp.nesting_level
37
37
  end
38
38
 
@@ -1,58 +1,50 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Expression#to_s') do
4
- specify('literal alternation') do
5
- pattern = 'abcd|ghij|klmn|pqur'
4
+ def parse_frozen(pattern, ruby_version = nil)
5
+ IceNine.deep_freeze(RP.parse(pattern, *ruby_version))
6
+ end
6
7
 
7
- expect(RP.parse(pattern).to_s).to eq pattern
8
+ def expect_round_trip(pattern, ruby_version = nil)
9
+ parsed = parse_frozen(pattern, ruby_version)
10
+
11
+ expect(parsed.to_s).to eql(pattern)
8
12
  end
9
13
 
10
- specify('quantified alternations') do
11
- pattern = '(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)'
14
+ specify('literal alternation') do
15
+ expect_round_trip('abcd|ghij|klmn|pqur')
16
+ end
12
17
 
13
- expect(RP.parse(pattern).to_s).to eq pattern
18
+ specify('quantified alternations') do
19
+ expect_round_trip('(?:a?[b]+(c){2}|d+[e]*(f)?)|(?:g+[h]?(i){2,3}|j*[k]{3,5}(l)?)')
14
20
  end
15
21
 
16
22
  specify('quantified sets') do
17
- pattern = '[abc]+|[^def]{3,6}'
18
-
19
- expect(RP.parse(pattern).to_s).to eq pattern
23
+ expect_round_trip('[abc]+|[^def]{3,6}')
20
24
  end
21
25
 
22
26
  specify('property sets') do
23
- pattern = '[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+'
24
-
25
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
27
+ expect_round_trip('[\\a\\b\\p{Lu}\\P{Z}\\c\\d]+', 'ruby/1.9')
26
28
  end
27
29
 
28
30
  specify('groups') do
29
- pattern = "(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++"
30
-
31
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
31
+ expect_round_trip("(a(?>b(?:c(?<n>d(?'N'e)??f)+g)*+h)*i)++", 'ruby/1.9')
32
32
  end
33
33
 
34
34
  specify('assertions') do
35
- pattern = '(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?'
36
-
37
- expect(RP.parse(pattern, 'ruby/1.9').to_s).to eq pattern
35
+ expect_round_trip('(a+(?=b+(?!c+(?<=d+(?<!e+)?f+)?g+)?h+)?i+)?', 'ruby/1.9')
38
36
  end
39
37
 
40
38
  specify('comments') do
41
- pattern = '(?#start)a(?#middle)b(?#end)'
42
-
43
- expect(RP.parse(pattern).to_s).to eq pattern
39
+ expect_round_trip('(?#start)a(?#middle)b(?#end)')
44
40
  end
45
41
 
46
42
  specify('options') do
47
- pattern = '(?mix:start)a(?-mix:middle)b(?i-mx:end)'
48
-
49
- expect(RP.parse(pattern).to_s).to eq pattern
43
+ expect_round_trip('(?mix:start)a(?-mix:middle)b(?i-mx:end)')
50
44
  end
51
45
 
52
46
  specify('url') do
53
- pattern = ('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
54
-
55
- expect(RP.parse(pattern).to_s).to eq pattern
47
+ expect_round_trip('(^$)|(^(http|https):\\/\\/[a-z0-9]+([\\-\\.]{1}[a-z0-9]+)*' + '\\.[a-z]{2,5}(([0-9]{1,5})?\\/.*)?$)')
56
48
  end
57
49
 
58
50
  specify('multiline source') do
@@ -64,7 +56,7 @@ RSpec.describe('Expression#to_s') do
64
56
  \z
65
57
  /x
66
58
 
67
- expect(RP.parse(multiline).to_s).to eq multiline.source
59
+ expect(parse_frozen(multiline).to_s).to eql(multiline.source)
68
60
  end
69
61
 
70
62
  specify('multiline #to_s') do
@@ -76,7 +68,7 @@ RSpec.describe('Expression#to_s') do
76
68
  \z
77
69
  /x
78
70
 
79
- expect(RP.parse(multiline.to_s).to_s).to eq multiline.to_s
71
+ expect_round_trip(multiline.to_s)
80
72
  end
81
73
 
82
74
  # Free spacing expressions that use spaces between quantifiers and their
@@ -93,24 +85,24 @@ RSpec.describe('Expression#to_s') do
93
85
  /x
94
86
 
95
87
  str = 'bbbcged'
96
- root = RP.parse(multiline)
88
+ root = parse_frozen(multiline)
97
89
 
98
- expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eq multiline.match(str)[0]
90
+ expect(Regexp.new(root.to_s, Regexp::EXTENDED).match(str)[0]).to eql(multiline.match(str)[0])
99
91
  end
100
92
 
101
93
  # special case: implicit groups used for chained quantifiers produce no parens
102
94
  specify 'chained quantifiers #to_s' do
103
95
  pattern = /a+{1}{2}/
104
- root = RP.parse(pattern)
105
- expect(root.to_s).to eq 'a+{1}{2}'
96
+ root = parse_frozen(pattern)
97
+ expect(root.to_s).to eql('a+{1}{2}')
106
98
  end
107
99
 
108
100
  # regression test for https://github.com/ammar/regexp_parser/issues/74
109
101
  specify('non-ascii comment') do
110
102
  pattern = '(?x) 😋 # 😋'
111
103
  root = RP.parse(pattern)
112
- expect(root.last).to be_a Regexp::Expression::Comment
113
- expect(root.last.to_s).to eq '# 😋'
114
- expect(root.to_s).to eq pattern
104
+ expect(root.last).to be_a(Regexp::Expression::Comment)
105
+ expect(root.last.to_s).to eql('# 😋')
106
+ expect(root.to_s).to eql(pattern)
115
107
  end
116
108
  end
@@ -32,6 +32,11 @@ RSpec.describe('RefCall lexing') do
32
32
  include_examples 'lex', "(abc)\\g'1'",
33
33
  3 => [:backref, :number_call, "\\g'1'", 5, 10, 0, 0, 0]
34
34
 
35
+ include_examples 'lex', '\g<0>',
36
+ 0 => [:backref, :number_call, '\g<0>', 0, 5, 0, 0, 0]
37
+ include_examples 'lex', "\\g'0'",
38
+ 0 => [:backref, :number_call, "\\g'0'", 0, 5, 0, 0, 0]
39
+
35
40
  include_examples 'lex', '(abc)\g<-1>',
36
41
  3 => [:backref, :number_rel_call, '\g<-1>', 5, 11, 0, 0, 0]
37
42
  include_examples 'lex', "(abc)\\g'-1'",
@@ -34,10 +34,10 @@ RSpec.describe(Regexp::Parser) do
34
34
  end
35
35
 
36
36
  specify('parse no quantifier target raises error') do
37
- expect { RP.parse('?abc') }.to raise_error(ArgumentError)
37
+ expect { RP.parse('?abc') }.to raise_error(Regexp::Parser::Error)
38
38
  end
39
39
 
40
40
  specify('parse sequence no quantifier target raises error') do
41
- expect { RP.parse('abc|?def') }.to raise_error(ArgumentError)
41
+ expect { RP.parse('abc|?def') }.to raise_error(Regexp::Parser::Error)
42
42
  end
43
43
  end
@@ -9,7 +9,7 @@ RSpec.describe('Parsing errors') do
9
9
  .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
10
  end
11
11
 
12
- RSpec.shared_examples 'UnknownTokenError' do |type, token|
12
+ RSpec.shared_examples 'UnknownTokenError' do |type|
13
13
  it "raises for unkown tokens of type #{type}" do
14
14
  expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
15
  .to raise_error(Regexp::Parser::UnknownTokenError)
@@ -11,6 +11,7 @@ RSpec.describe('Quantifier parsing') do
11
11
  expect(exp.quantifier.min).to eq min
12
12
  expect(exp.quantifier.max).to eq max
13
13
  expect(exp.quantifier.mode).to eq mode
14
+ expect(exp.quantifier.text).to eq text
14
15
  end
15
16
  end
16
17
 
@@ -29,6 +29,11 @@ RSpec.describe('Refcall parsing') do
29
29
  include_examples 'parse', /(abc)\g'1'/,
30
30
  1 => [:backref, :number_call, Backreference::NumberCall, number: 1]
31
31
 
32
+ include_examples 'parse', '\g<0>',
33
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
34
+ include_examples 'parse', "\\g'0'",
35
+ 0 => [:backref, :number_call, Backreference::NumberCall, number: 0]
36
+
32
37
  include_examples 'parse', /(abc)\g<-1>/,
33
38
  1 => [:backref, :number_rel_call, Backreference::NumberCallRelative, number: -1]
34
39
  include_examples 'parse', /(abc)\g'-1'/,
@@ -4,7 +4,7 @@ RSpec.describe('Escape scanning') do
4
4
  include_examples 'scan', /c\at/, 1 => [:escape, :bell, '\a', 1, 3]
5
5
 
6
6
  # not an escape outside a character set
7
- include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
7
+ include_examples 'scan', /c\bt/, 1 => [:anchor, :word_boundary, '\b', 1, 3]
8
8
 
9
9
  include_examples 'scan', /c\ft/, 1 => [:escape, :form_feed, '\f', 1, 3]
10
10
  include_examples 'scan', /c\nt/, 1 => [:escape, :newline, '\n', 1, 3]
@@ -27,6 +27,7 @@ RSpec.describe('Escape scanning') do
27
27
  include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5]
28
28
  include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]
29
29
 
30
+ include_examples 'scan', 'a\xA', 1 => [:escape, :hex, '\xA', 1, 4]
30
31
  include_examples 'scan', 'a\x24c', 1 => [:escape, :hex, '\x24', 1, 5]
31
32
  include_examples 'scan', 'a\x0640c', 1 => [:escape, :hex, '\x06', 1, 5]
32
33
 
@@ -5,11 +5,20 @@ RSpec.describe('Group scanning') do
5
5
  include_examples 'scan', '(?>abc)', 0 => [:group, :atomic, '(?>', 0, 3]
6
6
  include_examples 'scan', '(abc)', 0 => [:group, :capture, '(', 0, 1]
7
7
 
8
+ # Named groups
9
+ # only names that start with a hyphen or digit (ascii or other) are invalid
8
10
  include_examples 'scan', '(?<name>abc)', 0 => [:group, :named_ab, '(?<name>', 0, 8]
9
11
  include_examples 'scan', "(?'name'abc)", 0 => [:group, :named_sq, "(?'name'", 0, 8]
10
-
11
12
  include_examples 'scan', '(?<name_1>abc)', 0 => [:group, :named_ab, '(?<name_1>', 0,10]
12
13
  include_examples 'scan', "(?'name_1'abc)", 0 => [:group, :named_sq, "(?'name_1'", 0,10]
14
+ include_examples 'scan', '(?<name-1>abc)', 0 => [:group, :named_ab, '(?<name-1>', 0,10]
15
+ include_examples 'scan', "(?'name-1'abc)", 0 => [:group, :named_sq, "(?'name-1'", 0,10]
16
+ include_examples 'scan', "(?<name'1>abc)", 0 => [:group, :named_ab, "(?<name'1>", 0,10]
17
+ include_examples 'scan', "(?'name>1'abc)", 0 => [:group, :named_sq, "(?'name>1'", 0,10]
18
+ include_examples 'scan', '(?<üüuuüü>abc)', 0 => [:group, :named_ab, '(?<üüuuüü>', 0,10]
19
+ include_examples 'scan', "(?'üüuuüü'abc)", 0 => [:group, :named_sq, "(?'üüuuüü'", 0,10]
20
+ include_examples 'scan', "(?<😋1234😋>abc)", 0 => [:group, :named_ab, "(?<😋1234😋>", 0,10]
21
+ include_examples 'scan', "(?'😋1234😋'abc)", 0 => [:group, :named_sq, "(?'😋1234😋'", 0,10]
13
22
 
14
23
  include_examples 'scan', '(?:abc)', 0 => [:group, :passive, '(?:', 0, 3]
15
24
  include_examples 'scan', '(?:)', 0 => [:group, :passive, '(?:', 0, 3]
@@ -5,9 +5,19 @@ RSpec.describe('RefCall scanning') do
5
5
  include_examples 'scan', '(abc)\1' , 3 => [:backref, :number, '\1', 5, 7]
6
6
 
7
7
  # Group back-references, named, numbered, and relative
8
+ #
9
+ # NOTE: only \g supports forward-looking references using '+', e.g. \g<+1>
10
+ # refers to the next group, but \k<+1> refers to a group named '+1'.
11
+ # Inversely, only \k supports addition or substraction of a recursion level.
12
+ # E.g. \k<x+0> refers to a group named 'x' at the current recursion level,
13
+ # but \g<x+0> refers to a a group named 'x+0'.
14
+ #
8
15
  include_examples 'scan', '(?<X>abc)\k<X>', 3 => [:backref, :name_ref_ab, '\k<X>', 9, 14]
9
16
  include_examples 'scan', "(?<X>abc)\\k'X'", 3 => [:backref, :name_ref_sq, "\\k'X'", 9, 14]
10
17
 
18
+ include_examples 'scan', '(?<+1>abc)\k<+1>', 3 => [:backref, :name_ref_ab, '\k<+1>', 10, 16]
19
+ include_examples 'scan', "(?<+1>abc)\\k'+1'", 3 => [:backref, :name_ref_sq, "\\k'+1'", 10, 16]
20
+
11
21
  include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10]
12
22
  include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10]
13
23
 
@@ -18,9 +28,15 @@ RSpec.describe('RefCall scanning') do
18
28
  include_examples 'scan', '(?<X>abc)\g<X>', 3 => [:backref, :name_call_ab, '\g<X>', 9, 14]
19
29
  include_examples 'scan', "(?<X>abc)\\g'X'", 3 => [:backref, :name_call_sq, "\\g'X'", 9, 14]
20
30
 
31
+ include_examples 'scan', '(?<X>abc)\g<X-1>', 3 => [:backref, :name_call_ab, '\g<X-1>', 9, 16]
32
+ include_examples 'scan', "(?<X>abc)\\g'X-1'", 3 => [:backref, :name_call_sq, "\\g'X-1'", 9, 16]
33
+
21
34
  include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10]
22
35
  include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10]
23
36
 
37
+ include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9]
38
+ include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9]
39
+
24
40
  include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11]
25
41
  include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11]
26
42
 
@@ -33,4 +49,7 @@ RSpec.describe('RefCall scanning') do
33
49
 
34
50
  include_examples 'scan', '(abc)\k<1-0>', 3 => [:backref, :number_recursion_ref_ab, '\k<1-0>', 5, 12]
35
51
  include_examples 'scan', "(abc)\\k'1-0'", 3 => [:backref, :number_recursion_ref_sq, "\\k'1-0'", 5, 12]
52
+
53
+ include_examples 'scan', '(abc)\k<+1-0>', 3 => [:backref, :name_recursion_ref_ab, '\k<+1-0>', 5, 13]
54
+ include_examples 'scan', "(abc)\\k'+1-0'", 3 => [:backref, :name_recursion_ref_sq, "\\k'+1-0'", 5, 13]
36
55
  end
@@ -6,8 +6,18 @@ RSpec.describe('Set scanning') do
6
6
  include_examples 'scan', /[^n]/, 1 => [:set, :negate, '^', 1, 2]
7
7
 
8
8
  include_examples 'scan', /[c]/, 1 => [:literal, :literal, 'c', 1, 2]
9
- include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
10
- include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
9
+ include_examples 'scan', /[^d]/, 2 => [:literal, :literal, 'd', 2, 3]
10
+
11
+ include_examples 'scan', /[\b]/, 1 => [:escape, :backspace, '\b', 1, 3]
12
+ include_examples 'scan', /[A\bX]/, 2 => [:escape, :backspace, '\b', 2, 4]
13
+
14
+ include_examples 'scan', /[\a]/, 1 => [:escape, :bell, '\a', 1, 3]
15
+ include_examples 'scan', /[\e]/, 1 => [:escape, :escape, '\e', 1, 3]
16
+ include_examples 'scan', /[\f]/, 1 => [:escape, :form_feed, '\f', 1, 3]
17
+ include_examples 'scan', /[\n]/, 1 => [:escape, :newline, '\n', 1, 3]
18
+ include_examples 'scan', /[\r]/, 1 => [:escape, :carriage, '\r', 1, 3]
19
+ include_examples 'scan', /[\t]/, 1 => [:escape, :tab, '\t', 1, 3]
20
+ include_examples 'scan', /[\v]/, 1 => [:escape, :vertical_tab, '\v', 1, 3]
11
21
 
12
22
  include_examples 'scan', /[.]/, 1 => [:literal, :literal, '.', 1, 2]
13
23
  include_examples 'scan', /[?]/, 1 => [:literal, :literal, '?', 1, 2]
@@ -18,22 +28,36 @@ RSpec.describe('Set scanning') do
18
28
  include_examples 'scan', /[<]/, 1 => [:literal, :literal, '<', 1, 2]
19
29
  include_examples 'scan', /[>]/, 1 => [:literal, :literal, '>', 1, 2]
20
30
 
21
- include_examples 'scan', /[\x20]/, 1 => [:escape, :hex, '\x20', 1, 5]
22
-
23
- include_examples 'scan', '[\.]', 1 => [:escape, :dot, '\.', 1, 3]
31
+ include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
32
+ include_examples 'scan', '[\u0040]', 1 => [:escape, :codepoint, '\u0040', 1, 7]
33
+ include_examples 'scan', '[\u{40}]', 1 => [:escape, :codepoint_list, '\u{40}', 1, 7]
34
+ include_examples 'scan', '[\c2]', 1 => [:escape, :control, '\c2', 1, 4]
35
+ include_examples 'scan', '[\C-C]', 1 => [:escape, :control, '\C-C', 1, 5]
36
+ include_examples 'scan', '[\x20]', 1 => [:escape, :hex, '\x20', 1, 5]
37
+ include_examples 'scan', '[\M-Z]', 1 => [:escape, :meta_sequence, '\M-Z', 1, 5]
38
+ include_examples 'scan', '[\M-\C-X]', 1 => [:escape, :meta_sequence, '\M-\C-X', 1, 8]
39
+ include_examples 'scan', '[\\[]', 1 => [:escape, :set_open, '\[', 1, 3]
40
+ include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
41
+ include_examples 'scan', '[a\-]', 2 => [:escape, :literal, '\-', 2, 4]
42
+ include_examples 'scan', '[\-c]', 1 => [:escape, :literal, '\-', 1, 3]
43
+ include_examples 'scan', '[\.]', 1 => [:escape, :literal, '\.', 1, 3]
44
+ include_examples 'scan', '[\?]', 1 => [:escape, :literal, '\?', 1, 3]
45
+ include_examples 'scan', '[\*]', 1 => [:escape, :literal, '\*', 1, 3]
46
+ include_examples 'scan', '[\+]', 1 => [:escape, :literal, '\+', 1, 3]
47
+ include_examples 'scan', '[\|]', 1 => [:escape, :literal, '\|', 1, 3]
48
+ include_examples 'scan', '[\{]', 1 => [:escape, :literal, '\{', 1, 3]
49
+ include_examples 'scan', '[\}]', 1 => [:escape, :literal, '\}', 1, 3]
50
+ include_examples 'scan', '[\(]', 1 => [:escape, :literal, '\(', 1, 3]
51
+ include_examples 'scan', '[\)]', 1 => [:escape, :literal, '\)', 1, 3]
24
52
  include_examples 'scan', '[\!]', 1 => [:escape, :literal, '\!', 1, 3]
25
53
  include_examples 'scan', '[\#]', 1 => [:escape, :literal, '\#', 1, 3]
26
- include_examples 'scan', '[\\]]', 1 => [:escape, :set_close, '\]', 1, 3]
27
- include_examples 'scan', '[\\\\]', 1 => [:escape, :backslash, '\\\\', 1, 3]
28
54
  include_examples 'scan', '[\A]', 1 => [:escape, :literal, '\A', 1, 3]
29
55
  include_examples 'scan', '[\z]', 1 => [:escape, :literal, '\z', 1, 3]
30
56
  include_examples 'scan', '[\g]', 1 => [:escape, :literal, '\g', 1, 3]
31
57
  include_examples 'scan', '[\K]', 1 => [:escape, :literal, '\K', 1, 3]
32
58
  include_examples 'scan', '[\R]', 1 => [:escape, :literal, '\R', 1, 3]
33
59
  include_examples 'scan', '[\X]', 1 => [:escape, :literal, '\X', 1, 3]
34
- include_examples 'scan', '[\c2]', 1 => [:escape, :literal, '\c', 1, 3]
35
60
  include_examples 'scan', '[\B]', 1 => [:escape, :literal, '\B', 1, 3]
36
- include_examples 'scan', '[a\-c]', 2 => [:escape, :literal, '\-', 2, 4]
37
61
 
38
62
  include_examples 'scan', /[\d]/, 1 => [:type, :digit, '\d', 1, 3]
39
63
  include_examples 'scan', /[\da-z]/, 1 => [:type, :digit, '\d', 1, 3]
@@ -54,19 +78,23 @@ RSpec.describe('Set scanning') do
54
78
  include_examples 'scan', /[a-b-]/, 4 => [:literal, :literal, '-', 4, 5]
55
79
  include_examples 'scan', /[-a]/, 1 => [:literal, :literal, '-', 1, 2]
56
80
  include_examples 'scan', /[a-c^]/, 4 => [:literal, :literal, '^', 4, 5]
57
- include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
58
- include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
81
+ include_examples 'scan', /[a-bd-f]/, 2 => [:set, :range, '-', 2, 3]
82
+ include_examples 'scan', /[a-cd-f]/, 5 => [:set, :range, '-', 5, 6]
83
+ # this is a buggy range, it matches only `c`, but not `a`, `b` or `-`
84
+ include_examples 'scan', /[a-[c]]/, 2 => [:set, :range, '-', 2, 3]
85
+ # these are not ranges, they match `a`, `c` and `-` (or non-`-` if negated)
86
+ include_examples 'scan', /[[a]-[c]]/, 4 => [:literal, :literal, '-', 4, 5]
87
+ include_examples 'scan', /[[a]-c]/, 4 => [:literal, :literal, '-', 4, 5]
88
+ include_examples 'scan', /[^-c]/, 2 => [:literal, :literal, '-', 2, 3]
59
89
 
60
90
  include_examples 'scan', /[a[:digit:]c]/, 2 => [:posixclass, :digit, '[:digit:]', 2, 11]
61
91
  include_examples 'scan', /[[:digit:][:space:]]/, 2 => [:posixclass, :space, '[:space:]', 10, 19]
62
92
  include_examples 'scan', /[[:^digit:]]/, 1 => [:nonposixclass, :digit, '[:^digit:]', 1, 11]
63
93
 
64
- include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
65
- include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
66
-
67
94
  include_examples 'scan', /[a-d&&g-h]/, 4 => [:set, :intersection, '&&', 4, 6]
68
95
  include_examples 'scan', /[a&&]/, 2 => [:set, :intersection, '&&', 2, 4]
69
96
  include_examples 'scan', /[&&z]/, 1 => [:set, :intersection, '&&', 1, 3]
97
+ include_examples 'scan', /[&&]/, 1 => [:set, :intersection, '&&', 1, 3]
70
98
 
71
99
  include_examples 'scan', /[a\p{digit}c]/, 2 => [:property, :digit, '\p{digit}', 2, 11]
72
100
  include_examples 'scan', /[a\P{digit}c]/, 2 => [:nonproperty, :digit, '\P{digit}', 2, 11]
@@ -88,6 +116,21 @@ RSpec.describe('Set scanning') do
88
116
  8 => [:set, :range, '-', 9, 10],
89
117
  10=> [:set, :close, ']', 11, 12]
90
118
 
119
+ # Collations/collating sequences and character equivalents are not enabled
120
+ # in Ruby at the moment. If they ever are, enable them in the scanner,
121
+ # add them to a new syntax version, and handle them in the parser. Until then,
122
+ # expect them to be scanned as regular subsets containing literals.
123
+ # include_examples 'scan', /[a[.a-b.]c]/, 2 => [:set, :collation, '[.a-b.]', 2, 9]
124
+ # include_examples 'scan', /[a[=e=]c]/, 2 => [:set, :equivalent, '[=e=]', 2, 7]
125
+ include_examples 'scan', '[a[.a-b.]c]',
126
+ 2 => [:set, :open, '[', 2, 3],
127
+ 3 => [:literal, :literal, '.', 3, 4],
128
+ 4 => [:literal, :literal, 'a', 4, 5]
129
+ include_examples 'scan', '[a[=e=]c]',
130
+ 2 => [:set, :open, '[', 2, 3],
131
+ 3 => [:literal, :literal, '=', 3, 4],
132
+ 4 => [:literal, :literal, 'e', 4, 5]
133
+
91
134
  # multi-byte characters should not affect indices
92
135
  include_examples 'scan', /[れます]/,
93
136
  0 => [:set, :open, '[', 0, 1],
data/spec/spec_helper.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'ice_nine'
1
2
  require 'regexp_parser'
2
3
  require 'regexp_property_values'
3
4
  require_relative 'support/shared_examples'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ammar Ali
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-11-25 00:00:00.000000000 Z
11
+ date: 2021-02-23 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: A library for tokenizing, lexing, and parsing Ruby regular expressions.
14
14
  email:
@@ -23,6 +23,7 @@ files:
23
23
  - README.md
24
24
  - Rakefile
25
25
  - lib/regexp_parser.rb
26
+ - lib/regexp_parser/error.rb
26
27
  - lib/regexp_parser/expression.rb
27
28
  - lib/regexp_parser/expression/classes/alternation.rb
28
29
  - lib/regexp_parser/expression/classes/anchor.rb
@@ -183,7 +184,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
184
  - !ruby/object:Gem::Version
184
185
  version: '0'
185
186
  requirements: []
186
- rubygems_version: 3.2.0.rc.1
187
+ rubygems_version: 3.2.3
187
188
  signing_key:
188
189
  specification_version: 4
189
190
  summary: Scanner, lexer, parser for ruby's regular expressions