regexp_parser 1.5.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/lib/regexp_parser/expression.rb +6 -43
  4. data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
  5. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  6. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  7. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  8. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  9. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  10. data/lib/regexp_parser/expression/sequence.rb +3 -2
  11. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  12. data/lib/regexp_parser/lexer.rb +0 -21
  13. data/lib/regexp_parser/parser.rb +22 -21
  14. data/lib/regexp_parser/scanner.rb +1159 -1329
  15. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  16. data/lib/regexp_parser/scanner/scanner.rl +82 -190
  17. data/lib/regexp_parser/version.rb +1 -1
  18. data/spec/expression/base_spec.rb +14 -0
  19. data/spec/expression/methods/match_length_spec.rb +13 -0
  20. data/spec/expression/methods/match_spec.rb +25 -0
  21. data/spec/expression/methods/tests_spec.rb +2 -0
  22. data/spec/expression/options_spec.rb +128 -0
  23. data/spec/expression/root_spec.rb +9 -0
  24. data/spec/expression/sequence_spec.rb +9 -0
  25. data/spec/lexer/conditionals_spec.rb +49 -119
  26. data/spec/lexer/escapes_spec.rb +8 -32
  27. data/spec/lexer/keep_spec.rb +5 -17
  28. data/spec/lexer/literals_spec.rb +73 -110
  29. data/spec/lexer/nesting_spec.rb +86 -117
  30. data/spec/lexer/refcalls_spec.rb +51 -50
  31. data/spec/parser/all_spec.rb +13 -1
  32. data/spec/parser/anchors_spec.rb +9 -23
  33. data/spec/parser/conditionals_spec.rb +9 -9
  34. data/spec/parser/errors_spec.rb +22 -43
  35. data/spec/parser/escapes_spec.rb +33 -44
  36. data/spec/parser/groups_spec.rb +98 -257
  37. data/spec/parser/keep_spec.rb +2 -15
  38. data/spec/parser/posix_classes_spec.rb +5 -24
  39. data/spec/parser/properties_spec.rb +42 -54
  40. data/spec/parser/quantifiers_spec.rb +41 -283
  41. data/spec/parser/refcalls_spec.rb +60 -185
  42. data/spec/parser/set/intersections_spec.rb +17 -17
  43. data/spec/parser/set/ranges_spec.rb +17 -17
  44. data/spec/parser/sets_spec.rb +5 -5
  45. data/spec/parser/types_spec.rb +11 -36
  46. data/spec/scanner/anchors_spec.rb +13 -28
  47. data/spec/scanner/conditionals_spec.rb +121 -173
  48. data/spec/scanner/errors_spec.rb +65 -87
  49. data/spec/scanner/escapes_spec.rb +49 -50
  50. data/spec/scanner/free_space_spec.rb +102 -165
  51. data/spec/scanner/groups_spec.rb +45 -64
  52. data/spec/scanner/keep_spec.rb +5 -28
  53. data/spec/scanner/literals_spec.rb +45 -81
  54. data/spec/scanner/meta_spec.rb +13 -33
  55. data/spec/scanner/properties_spec.rb +43 -286
  56. data/spec/scanner/quantifiers_spec.rb +13 -28
  57. data/spec/scanner/refcalls_spec.rb +32 -48
  58. data/spec/scanner/sets_spec.rb +88 -102
  59. data/spec/scanner/types_spec.rb +10 -25
  60. data/spec/spec_helper.rb +1 -0
  61. data/spec/support/shared_examples.rb +77 -0
  62. data/spec/syntax/syntax_spec.rb +4 -0
  63. data/spec/syntax/versions/1.8.6_spec.rb +12 -33
  64. data/spec/syntax/versions/1.9.1_spec.rb +5 -18
  65. data/spec/syntax/versions/1.9.3_spec.rb +4 -17
  66. data/spec/syntax/versions/2.0.0_spec.rb +8 -23
  67. data/spec/syntax/versions/2.2.0_spec.rb +4 -17
  68. data/spec/syntax/versions/aliases_spec.rb +25 -109
  69. metadata +14 -6
  70. data/spec/scanner/scripts_spec.rb +0 -49
  71. data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -1,43 +1,18 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('Type parsing') do
4
- tests = {
5
- /a\dc/ => [1, :type, :digit, CharacterType::Digit],
6
- /a\Dc/ => [1, :type, :nondigit, CharacterType::NonDigit],
3
+ RSpec.describe('CharacterType parsing') do
4
+ include_examples 'parse', /a\dc/, 1 => [:type, :digit, CharacterType::Digit]
5
+ include_examples 'parse', /a\Dc/, 1 => [:type, :nondigit, CharacterType::NonDigit]
7
6
 
8
- /a\sc/ => [1, :type, :space, CharacterType::Space],
9
- /a\Sc/ => [1, :type, :nonspace, CharacterType::NonSpace],
7
+ include_examples 'parse', /a\sc/, 1 => [:type, :space, CharacterType::Space]
8
+ include_examples 'parse', /a\Sc/, 1 => [:type, :nonspace, CharacterType::NonSpace]
10
9
 
11
- /a\hc/ => [1, :type, :hex, CharacterType::Hex],
12
- /a\Hc/ => [1, :type, :nonhex, CharacterType::NonHex],
10
+ include_examples 'parse', /a\hc/, 1 => [:type, :hex, CharacterType::Hex]
11
+ include_examples 'parse', /a\Hc/, 1 => [:type, :nonhex, CharacterType::NonHex]
13
12
 
14
- /a\wc/ => [1, :type, :word, CharacterType::Word],
15
- /a\Wc/ => [1, :type, :nonword, CharacterType::NonWord],
16
- }
13
+ include_examples 'parse', /a\wc/, 1 => [:type, :word, CharacterType::Word]
14
+ include_examples 'parse', /a\Wc/, 1 => [:type, :nonword, CharacterType::NonWord]
17
15
 
18
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
19
- specify("parse_type_#{token}_#{count}") do
20
- root = RP.parse(pattern, 'ruby/1.9')
21
- exp = root.expressions.at(index)
22
-
23
- expect(exp).to be_a(klass)
24
-
25
- expect(exp.type).to eq type
26
- expect(exp.token).to eq token
27
- end
28
- end
29
-
30
- tests_2_0 = { 'a\\Rc' => [1, :type, :linebreak, CharacterType::Linebreak], 'a\\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme] }
31
-
32
- tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
33
- specify("parse_type_#{token}_#{count}") do
34
- root = RP.parse(pattern, 'ruby/2.0')
35
- exp = root.expressions.at(index)
36
-
37
- expect(exp).to be_a(klass)
38
-
39
- expect(exp.type).to eq type
40
- expect(exp.token).to eq token
41
- end
42
- end
16
+ include_examples 'parse', 'a\\Rc', 1 => [:type, :linebreak, CharacterType::Linebreak]
17
+ include_examples 'parse', 'a\\Xc', 1 => [:type, :xgrapheme, CharacterType::ExtendedGrapheme]
43
18
  end
@@ -1,36 +1,21 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Anchor scanning') do
4
- tests = {
5
- '^abc' => [0, :anchor, :bol, '^', 0, 1],
6
- 'abc$' => [1, :anchor, :eol, '$', 3, 4],
4
+ include_examples 'scan', '^abc', 0 => [:anchor, :bol, '^', 0, 1]
5
+ include_examples 'scan', 'abc$', 1 => [:anchor, :eol, '$', 3, 4]
7
6
 
8
- '\Aabc' => [0, :anchor, :bos, '\A', 0, 2],
9
- 'abc\z' => [1, :anchor, :eos, '\z', 3, 5],
10
- 'abc\Z' => [1, :anchor, :eos_ob_eol, '\Z', 3, 5],
7
+ include_examples 'scan', '\Aabc', 0 => [:anchor, :bos, '\A', 0, 2]
8
+ include_examples 'scan', 'abc\z', 1 => [:anchor, :eos, '\z', 3, 5]
9
+ include_examples 'scan', 'abc\Z', 1 => [:anchor, :eos_ob_eol, '\Z', 3, 5]
11
10
 
12
- 'a\bc' => [1, :anchor, :word_boundary, '\b', 1, 3],
13
- 'a\Bc' => [1, :anchor, :nonword_boundary, '\B', 1, 3],
11
+ include_examples 'scan', 'a\bc', 1 => [:anchor, :word_boundary, '\b', 1, 3]
12
+ include_examples 'scan', 'a\Bc', 1 => [:anchor, :nonword_boundary, '\B', 1, 3]
14
13
 
15
- 'a\Gc' => [1, :anchor, :match_start, '\G', 1, 3],
14
+ include_examples 'scan', 'a\Gc', 1 => [:anchor, :match_start, '\G', 1, 3]
16
15
 
17
- "\\\\Ac" => [0, :escape, :backslash, '\\\\', 0, 2],
18
- "a\\\\z" => [1, :escape, :backslash, '\\\\', 1, 3],
19
- "a\\\\Z" => [1, :escape, :backslash, '\\\\', 1, 3],
20
- "a\\\\bc" => [1, :escape, :backslash, '\\\\', 1, 3],
21
- "a\\\\Bc" => [1, :escape, :backslash, '\\\\', 1, 3],
22
- }
23
-
24
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
25
- specify("scanner_#{type}_#{token}_#{count}") do
26
- tokens = RS.scan(pattern)
27
- result = tokens[index]
28
-
29
- expect(result[0]).to eq type
30
- expect(result[1]).to eq token
31
- expect(result[2]).to eq text
32
- expect(result[3]).to eq ts
33
- expect(result[4]).to eq te
34
- end
35
- end
16
+ include_examples 'scan', "\\\\Ac", 0 => [:escape, :backslash, '\\\\', 0, 2]
17
+ include_examples 'scan', "a\\\\z", 1 => [:escape, :backslash, '\\\\', 1, 3]
18
+ include_examples 'scan', "a\\\\Z", 1 => [:escape, :backslash, '\\\\', 1, 3]
19
+ include_examples 'scan', "a\\\\bc", 1 => [:escape, :backslash, '\\\\', 1, 3]
20
+ include_examples 'scan', "a\\\\Bc", 1 => [:escape, :backslash, '\\\\', 1, 3]
36
21
  end
@@ -1,180 +1,128 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Conditional scanning') do
4
- tests = {
5
- '(a)(?(1)T|F)1' => [3, :conditional, :open, '(?', 3, 5],
6
- '(a)(?(1)T|F)2' => [4, :conditional, :condition_open, '(', 5, 6],
7
- '(a)(?(1)T|F)3' => [5, :conditional, :condition, '1', 6, 7],
8
- '(a)(?(1)T|F)4' => [6, :conditional, :condition_close, ')', 7, 8],
9
- '(a)(?(1)T|F)5' => [7, :literal, :literal, 'T', 8, 9],
10
- '(a)(?(1)T|F)6' => [8, :conditional, :separator, '|', 9, 10],
11
- '(a)(?(1)T|F)7' => [9, :literal, :literal, 'F', 10, 11],
12
- '(a)(?(1)T|F)8' => [10, :conditional, :close, ')', 11, 12],
13
- '(a)(?(1)TRUE)9' => [8, :conditional, :close, ')', 12, 13],
14
- '(a)(?(1)TRUE|)10' => [8, :conditional, :separator, '|', 12, 13],
15
- '(a)(?(1)TRUE|)11' => [9, :conditional, :close, ')', 13, 14],
16
- '(?<N>A)(?(<N>)T|F)1' => [5, :conditional, :condition, '<N>', 10, 13],
17
- "(?'N'A)(?('N')T|F)2" => [5, :conditional, :condition, "'N'", 10, 13]
18
- }
4
+ include_examples 'scan', /(a)(?(1)T|F)1/, 3 => [:conditional, :open, '(?', 3, 5]
5
+ include_examples 'scan', /(a)(?(1)T|F)2/, 4 => [:conditional, :condition_open, '(', 5, 6]
6
+ include_examples 'scan', /(a)(?(1)T|F)3/, 5 => [:conditional, :condition, '1', 6, 7]
7
+ include_examples 'scan', /(a)(?(1)T|F)4/, 6 => [:conditional, :condition_close, ')', 7, 8]
8
+ include_examples 'scan', /(a)(?(1)T|F)5/, 7 => [:literal, :literal, 'T', 8, 9]
9
+ include_examples 'scan', /(a)(?(1)T|F)6/, 8 => [:conditional, :separator, '|', 9, 10]
10
+ include_examples 'scan', /(a)(?(1)T|F)7/, 9 => [:literal, :literal, 'F', 10, 11]
11
+ include_examples 'scan', /(a)(?(1)T|F)8/, 10 => [:conditional, :close, ')', 11, 12]
12
+ include_examples 'scan', /(a)(?(1)TRUE)9/, 8 => [:conditional, :close, ')', 12, 13]
13
+ include_examples 'scan', /(a)(?(1)TRUE|)10/, 8 => [:conditional, :separator, '|', 12, 13]
14
+ include_examples 'scan', /(a)(?(1)TRUE|)11/, 9 => [:conditional, :close, ')', 13, 14]
15
+ include_examples 'scan', /(?<N>A)(?(<N>)T|F)1/, 5 => [:conditional, :condition, '<N>', 10, 13]
16
+ include_examples 'scan', /(?'N'A)(?('N')T|F)2/, 5 => [:conditional, :condition, "'N'", 10, 13]
19
17
 
20
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
21
- specify("scanner_#{type}_#{token}_#{count}") do
22
- tokens = RS.scan(pattern)
23
- result = tokens[index]
18
+ include_examples 'scan', /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/,
19
+ 0 => [:group, :capture, '(', 0, 1],
20
+ 1 => [:literal, :literal, 'a', 1, 2],
21
+ 2 => [:group, :capture, '(', 2, 3],
22
+ 3 => [:literal, :literal, 'b', 3, 4],
23
+ 4 => [:group, :capture, '(', 4, 5],
24
+ 5 => [:literal, :literal, 'c', 5, 6],
25
+ 6 => [:group, :close, ')', 6, 7],
26
+ 7 => [:group, :close, ')', 7, 8],
27
+ 8 => [:group, :close, ')', 8, 9],
28
+ 9 => [:conditional, :open, '(?', 9, 11],
29
+ 10 => [:conditional, :condition_open, '(', 11, 12],
30
+ 11 => [:conditional, :condition, '1', 12, 13],
31
+ 12 => [:conditional, :condition_close, ')', 13, 14],
32
+ 13 => [:conditional, :open, '(?', 14, 16],
33
+ 14 => [:conditional, :condition_open, '(', 16, 17],
34
+ 15 => [:conditional, :condition, '2', 17, 18],
35
+ 16 => [:conditional, :condition_close, ')', 18, 19],
36
+ 17 => [:literal, :literal, 'd', 19, 20],
37
+ 18 => [:conditional, :separator, '|', 20, 21],
38
+ 19 => [:conditional, :open, '(?', 21, 23],
39
+ 20 => [:conditional, :condition_open, '(', 23, 24],
40
+ 21 => [:conditional, :condition, '3', 24, 25],
41
+ 22 => [:conditional, :condition_close, ')', 25, 26],
42
+ 23 => [:literal, :literal, 'e', 26, 27],
43
+ 24 => [:conditional, :separator, '|', 27, 28],
44
+ 25 => [:literal, :literal, 'f', 28, 29],
45
+ 26 => [:conditional, :close, ')', 29, 30],
46
+ 27 => [:conditional, :close, ')', 30, 31],
47
+ 28 => [:conditional, :separator, '|', 31, 32],
48
+ 29 => [:conditional, :open, '(?', 32, 34],
49
+ 30 => [:conditional, :condition_open, '(', 34, 35],
50
+ 31 => [:conditional, :condition, '2', 35, 36],
51
+ 32 => [:conditional, :condition_close, ')', 36, 37],
52
+ 33 => [:conditional, :open, '(?', 37, 39],
53
+ 34 => [:conditional, :condition_open, '(', 39, 40],
54
+ 35 => [:conditional, :condition, '1', 40, 41],
55
+ 36 => [:conditional, :condition_close, ')', 41, 42],
56
+ 37 => [:literal, :literal, 'g', 42, 43],
57
+ 38 => [:conditional, :separator, '|', 43, 44],
58
+ 39 => [:literal, :literal, 'h', 44, 45],
59
+ 40 => [:conditional, :close, ')', 45, 46],
60
+ 41 => [:conditional, :close, ')', 46, 47],
61
+ 42 => [:conditional, :close, ')', 47, 48]
24
62
 
25
- expect(result[0]).to eq type
26
- expect(result[1]).to eq token
27
- expect(result[2]).to eq text
28
- expect(result[3]).to eq ts
29
- expect(result[4]).to eq te
30
- end
31
- end
63
+ include_examples 'scan', /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/,
64
+ 0 => [:group, :capture, '(', 0, 1],
65
+ 1 => [:group, :capture, '(', 1, 2],
66
+ 2 => [:literal, :literal, 'a', 2, 3],
67
+ 3 => [:group, :close, ')', 3, 4],
68
+ 4 => [:meta, :alternation, '|', 4, 5],
69
+ 5 => [:group, :capture, '(', 5, 6],
70
+ 6 => [:literal, :literal, 'b', 6, 7],
71
+ 7 => [:group, :close, ')', 7, 8],
72
+ 8 => [:meta, :alternation, '|', 8, 9],
73
+ 9 => [:group, :capture, '(', 9, 10],
74
+ 10 => [:conditional, :open, '(?', 10, 12],
75
+ 11 => [:conditional, :condition_open, '(', 12, 13],
76
+ 12 => [:conditional, :condition, '2', 13, 14],
77
+ 13 => [:conditional, :condition_close, ')', 14, 15],
78
+ 14 => [:group, :capture, '(', 15, 16],
79
+ 15 => [:literal, :literal, 'c', 16, 17],
80
+ 16 => [:group, :capture, '(', 17, 18],
81
+ 17 => [:literal, :literal, 'd', 18, 19],
82
+ 18 => [:meta, :alternation, '|', 19, 20],
83
+ 19 => [:literal, :literal, 'e', 20, 21],
84
+ 20 => [:group, :close, ')', 21, 22],
85
+ 21 => [:quantifier, :one_or_more, '+', 22, 23],
86
+ 22 => [:group, :close, ')', 23, 24],
87
+ 23 => [:quantifier, :zero_or_one, '?', 24, 25],
88
+ 24 => [:conditional, :separator, '|', 25, 26],
89
+ 25 => [:conditional, :open, '(?', 26, 28],
90
+ 26 => [:conditional, :condition_open, '(', 28, 29],
91
+ 27 => [:conditional, :condition, '3', 29, 30],
92
+ 28 => [:conditional, :condition_close, ')', 30, 31],
93
+ 29 => [:literal, :literal, 'f', 31, 32],
94
+ 30 => [:conditional, :separator, '|', 32, 33],
95
+ 31 => [:conditional, :open, '(?', 33, 35],
96
+ 32 => [:conditional, :condition_open, '(', 35, 36],
97
+ 33 => [:conditional, :condition, '4', 36, 37],
98
+ 34 => [:conditional, :condition_close, ')', 37, 38],
99
+ 35 => [:group, :capture, '(', 38, 39],
100
+ 36 => [:literal, :literal, 'g', 39, 40],
101
+ 37 => [:meta, :alternation, '|', 40, 41],
102
+ 38 => [:group, :capture, '(', 41, 42],
103
+ 39 => [:literal, :literal, 'h', 42, 43],
104
+ 40 => [:group, :close, ')', 43, 44],
105
+ 41 => [:group, :capture, '(', 44, 45],
106
+ 42 => [:literal, :literal, 'i', 45, 46],
107
+ 43 => [:group, :close, ')', 46, 47],
108
+ 44 => [:group, :close, ')', 47, 48],
109
+ 45 => [:conditional, :close, ')', 48, 49],
110
+ 46 => [:conditional, :close, ')', 49, 50],
111
+ 47 => [:conditional, :close, ')', 50, 51],
112
+ 48 => [:group, :close, ')', 51, 52],
113
+ 49 => [:group, :close, ')', 52, 53]
32
114
 
33
- specify('scan conditional nested') do
34
- regexp = '(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))'
35
- tokens = RS.scan(regexp)
36
-
37
- [
38
- [ 0, :group, :capture, '(', 0, 1],
39
- [ 1, :literal, :literal, 'a', 1, 2],
40
- [ 2, :group, :capture, '(', 2, 3],
41
- [ 3, :literal, :literal, 'b', 3, 4],
42
- [ 4, :group, :capture, '(', 4, 5],
43
- [ 5, :literal, :literal, 'c', 5, 6],
44
- [ 6, :group, :close, ')', 6, 7],
45
- [ 7, :group, :close, ')', 7, 8],
46
- [ 8, :group, :close, ')', 8, 9],
47
- [ 9, :conditional, :open, '(?', 9, 11],
48
- [10, :conditional, :condition_open, '(', 11, 12],
49
- [11, :conditional, :condition, '1', 12, 13],
50
- [12, :conditional, :condition_close, ')', 13, 14],
51
- [13, :conditional, :open, '(?', 14, 16],
52
- [14, :conditional, :condition_open, '(', 16, 17],
53
- [15, :conditional, :condition, '2', 17, 18],
54
- [16, :conditional, :condition_close, ')', 18, 19],
55
- [17, :literal, :literal, 'd', 19, 20],
56
- [18, :conditional, :separator, '|', 20, 21],
57
- [19, :conditional, :open, '(?', 21, 23],
58
- [20, :conditional, :condition_open, '(', 23, 24],
59
- [21, :conditional, :condition, '3', 24, 25],
60
- [22, :conditional, :condition_close, ')', 25, 26],
61
- [23, :literal, :literal, 'e', 26, 27],
62
- [24, :conditional, :separator, '|', 27, 28],
63
- [25, :literal, :literal, 'f', 28, 29],
64
- [26, :conditional, :close, ')', 29, 30],
65
- [27, :conditional, :close, ')', 30, 31],
66
- [28, :conditional, :separator, '|', 31, 32],
67
- [29, :conditional, :open, '(?', 32, 34],
68
- [30, :conditional, :condition_open, '(', 34, 35],
69
- [31, :conditional, :condition, '2', 35, 36],
70
- [32, :conditional, :condition_close, ')', 36, 37],
71
- [33, :conditional, :open, '(?', 37, 39],
72
- [34, :conditional, :condition_open, '(', 39, 40],
73
- [35, :conditional, :condition, '1', 40, 41],
74
- [36, :conditional, :condition_close, ')', 41, 42],
75
- [37, :literal, :literal, 'g', 42, 43],
76
- [38, :conditional, :separator, '|', 43, 44],
77
- [39, :literal, :literal, 'h', 44, 45],
78
- [40, :conditional, :close, ')', 45, 46],
79
- [41, :conditional, :close, ')', 46, 47],
80
- [42, :conditional, :close, ')', 47, 48]
81
- ].each do |index, type, token, text, ts, te|
82
- result = tokens[index]
83
-
84
- expect(result[0]).to eq type
85
- expect(result[1]).to eq token
86
- expect(result[2]).to eq text
87
- expect(result[3]).to eq ts
88
- expect(result[4]).to eq te
89
- end
90
- end
91
-
92
- specify('scan conditional nested groups') do
93
- regexp = '((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))'
94
- tokens = RS.scan(regexp)
95
-
96
- [
97
- [ 0, :group, :capture, '(', 0, 1],
98
- [ 1, :group, :capture, '(', 1, 2],
99
- [ 2, :literal, :literal, 'a', 2, 3],
100
- [ 3, :group, :close, ')', 3, 4],
101
- [ 4, :meta, :alternation, '|', 4, 5],
102
- [ 5, :group, :capture, '(', 5, 6],
103
- [ 6, :literal, :literal, 'b', 6, 7],
104
- [ 7, :group, :close, ')', 7, 8],
105
- [ 8, :meta, :alternation, '|', 8, 9],
106
- [ 9, :group, :capture, '(', 9, 10],
107
- [10, :conditional, :open, '(?', 10, 12],
108
- [11, :conditional, :condition_open, '(', 12, 13],
109
- [12, :conditional, :condition, '2', 13, 14],
110
- [13, :conditional, :condition_close, ')', 14, 15],
111
- [14, :group, :capture, '(', 15, 16],
112
- [15, :literal, :literal, 'c', 16, 17],
113
- [16, :group, :capture, '(', 17, 18],
114
- [17, :literal, :literal, 'd', 18, 19],
115
- [18, :meta, :alternation, '|', 19, 20],
116
- [19, :literal, :literal, 'e', 20, 21],
117
- [20, :group, :close, ')', 21, 22],
118
- [21, :quantifier, :one_or_more, '+', 22, 23],
119
- [22, :group, :close, ')', 23, 24],
120
- [23, :quantifier, :zero_or_one, '?', 24, 25],
121
- [24, :conditional, :separator, '|', 25, 26],
122
- [25, :conditional, :open, '(?', 26, 28],
123
- [26, :conditional, :condition_open, '(', 28, 29],
124
- [27, :conditional, :condition, '3', 29, 30],
125
- [28, :conditional, :condition_close, ')', 30, 31],
126
- [29, :literal, :literal, 'f', 31, 32],
127
- [30, :conditional, :separator, '|', 32, 33],
128
- [31, :conditional, :open, '(?', 33, 35],
129
- [32, :conditional, :condition_open, '(', 35, 36],
130
- [33, :conditional, :condition, '4', 36, 37],
131
- [34, :conditional, :condition_close, ')', 37, 38],
132
- [35, :group, :capture, '(', 38, 39],
133
- [36, :literal, :literal, 'g', 39, 40],
134
- [37, :meta, :alternation, '|', 40, 41],
135
- [38, :group, :capture, '(', 41, 42],
136
- [39, :literal, :literal, 'h', 42, 43],
137
- [40, :group, :close, ')', 43, 44],
138
- [41, :group, :capture, '(', 44, 45],
139
- [42, :literal, :literal, 'i', 45, 46],
140
- [43, :group, :close, ')', 46, 47],
141
- [44, :group, :close, ')', 47, 48],
142
- [45, :conditional, :close, ')', 48, 49],
143
- [46, :conditional, :close, ')', 49, 50],
144
- [47, :conditional, :close, ')', 50, 51],
145
- [48, :group, :close, ')', 51, 52],
146
- [49, :group, :close, ')', 52, 53]
147
- ].each do |index, type, token, text, ts, te|
148
- result = tokens[index]
149
-
150
- expect(result[0]).to eq type
151
- expect(result[1]).to eq token
152
- expect(result[2]).to eq text
153
- expect(result[3]).to eq ts
154
- expect(result[4]).to eq te
155
- end
156
- end
157
-
158
- specify('scan conditional nested alternation') do
159
- regexp = '(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p'
160
- tokens = RS.scan(regexp)
161
-
162
- [9, 11, 17, 19, 32, 34, 40, 42, 46, 48].each do |index|
163
- result = tokens[index]
164
-
165
- expect(result[0]).to eq :meta
166
- expect(result[1]).to eq :alternation
167
- expect(result[2]).to eq '|'
168
- expect((result[4] - result[3])).to eq 1
169
- end
170
-
171
- [14, 37].each do |index|
172
- result = tokens[index]
173
-
174
- expect(result[0]).to eq :conditional
175
- expect(result[1]).to eq :separator
176
- expect(result[2]).to eq '|'
177
- expect((result[4] - result[3])).to eq 1
178
- end
179
- end
115
+ include_examples 'scan', /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/,
116
+ 9 => [:meta, :alternation, '|', 10, 11],
117
+ 11 => [:meta, :alternation, '|', 12, 13],
118
+ 14 => [:conditional, :separator, '|', 15, 16],
119
+ 17 => [:meta, :alternation, '|', 18, 19],
120
+ 19 => [:meta, :alternation, '|', 20, 21],
121
+ 32 => [:meta, :alternation, '|', 34, 35],
122
+ 34 => [:meta, :alternation, '|', 36, 37],
123
+ 37 => [:conditional, :separator, '|', 39, 40],
124
+ 40 => [:meta, :alternation, '|', 42, 43],
125
+ 42 => [:meta, :alternation, '|', 44, 45],
126
+ 46 => [:meta, :alternation, '|', 48, 49],
127
+ 48 => [:meta, :alternation, '|', 50, 51]
180
128
  end
@@ -1,90 +1,68 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('Scanning errors') do
4
- specify('scanner unbalanced set') do
5
- expect { RS.scan('[[:alpha:]') }.to raise_error(RS::PrematureEndError)
6
- end
7
-
8
- specify('scanner unbalanced group') do
9
- expect { RS.scan('(abc') }.to raise_error(RS::PrematureEndError)
10
- end
11
-
12
- specify('scanner unbalanced interval') do
13
- expect { RS.scan('a{1,2') }.to raise_error(RS::PrematureEndError)
14
- end
15
-
16
- specify('scanner eof in property') do
17
- expect { RS.scan('\\p{asci') }.to raise_error(RS::PrematureEndError)
18
- end
19
-
20
- specify('scanner incomplete property') do
21
- expect { RS.scan('\\p{ascii abc') }.to raise_error(RS::PrematureEndError)
22
- end
23
-
24
- specify('scanner unknown property') do
25
- expect { RS.scan('\\p{foobar}') }.to raise_error(RS::UnknownUnicodePropertyError)
26
- end
27
-
28
- specify('scanner incomplete options') do
29
- expect { RS.scan('(?mix abc)') }.to raise_error(RS::ScannerError)
30
- end
31
-
32
- specify('scanner eof options') do
33
- expect { RS.scan('(?mix') }.to raise_error(RS::PrematureEndError)
34
- end
35
-
36
- specify('scanner incorrect options') do
37
- expect { RS.scan('(?mix^bc') }.to raise_error(RS::ScannerError)
38
- end
39
-
40
- specify('scanner eof escape') do
41
- expect { RS.scan('\\') }.to raise_error(RS::PrematureEndError)
42
- end
43
-
44
- specify('scanner eof in hex escape') do
45
- expect { RS.scan('\\x') }.to raise_error(RS::PrematureEndError)
46
- end
47
-
48
- specify('scanner eof in codepoint escape') do
49
- expect { RS.scan('\\u') }.to raise_error(RS::PrematureEndError)
50
- expect { RS.scan('\\u0') }.to raise_error(RS::PrematureEndError)
51
- expect { RS.scan('\\u00') }.to raise_error(RS::PrematureEndError)
52
- expect { RS.scan('\\u000') }.to raise_error(RS::PrematureEndError)
53
- expect { RS.scan('\\u{') }.to raise_error(RS::PrematureEndError)
54
- expect { RS.scan('\\u{00') }.to raise_error(RS::PrematureEndError)
55
- expect { RS.scan('\\u{0000') }.to raise_error(RS::PrematureEndError)
56
- expect { RS.scan('\\u{0000 ') }.to raise_error(RS::PrematureEndError)
57
- expect { RS.scan('\\u{0000 0000') }.to raise_error(RS::PrematureEndError)
58
- end
59
-
60
- specify('scanner eof in control sequence') do
61
- expect { RS.scan('\\c') }.to raise_error(RS::PrematureEndError)
62
- expect { RS.scan('\\c\\M') }.to raise_error(RS::PrematureEndError)
63
- expect { RS.scan('\\c\\M-') }.to raise_error(RS::PrematureEndError)
64
- expect { RS.scan('\\C') }.to raise_error(RS::PrematureEndError)
65
- expect { RS.scan('\\C-') }.to raise_error(RS::PrematureEndError)
66
- expect { RS.scan('\\C-\\M') }.to raise_error(RS::PrematureEndError)
67
- expect { RS.scan('\\C-\\M-') }.to raise_error(RS::PrematureEndError)
68
- end
69
-
70
- specify('scanner eof in meta sequence') do
71
- expect { RS.scan('\\M') }.to raise_error(RS::PrematureEndError)
72
- expect { RS.scan('\\M-') }.to raise_error(RS::PrematureEndError)
73
- expect { RS.scan('\\M-\\') }.to raise_error(RS::PrematureEndError)
74
- expect { RS.scan('\\M-\\c') }.to raise_error(RS::PrematureEndError)
75
- expect { RS.scan('\\M-\\C') }.to raise_error(RS::PrematureEndError)
76
- expect { RS.scan('\\M-\\C-') }.to raise_error(RS::PrematureEndError)
77
- end
78
-
79
- specify('scanner invalid hex escape') do
80
- expect { RS.scan('\\xZ') }.to raise_error(RS::InvalidSequenceError)
81
- expect { RS.scan('\\xZ0') }.to raise_error(RS::InvalidSequenceError)
82
- end
83
-
84
- specify('scanner invalid named group') do
85
- expect { RS.scan("(?'')") }.to raise_error(RS::InvalidGroupError)
86
- expect { RS.scan("(?''empty-name)") }.to raise_error(RS::InvalidGroupError)
87
- expect { RS.scan('(?<>)') }.to raise_error(RS::InvalidGroupError)
88
- expect { RS.scan('(?<>empty-name)') }.to raise_error(RS::InvalidGroupError)
89
- end
3
+ RSpec.describe(Regexp::Scanner) do
4
+ RSpec.shared_examples 'scan error' do |error, issue, source|
5
+ it "raises #{error} for #{issue} `#{source}`" do
6
+ expect { RS.scan(source) }.to raise_error(error)
7
+ end
8
+ end
9
+
10
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[a'
11
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced set', '[[:alpha:]'
12
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced group', '(abc'
13
+ include_examples 'scan error', RS::PrematureEndError, 'unbalanced interval', 'a{1,2'
14
+ include_examples 'scan error', RS::PrematureEndError, 'eof in property', '\p{asci'
15
+ include_examples 'scan error', RS::PrematureEndError, 'incomplete property', '\p{ascii abc'
16
+ include_examples 'scan error', RS::PrematureEndError, 'eof options', '(?mix'
17
+ include_examples 'scan error', RS::PrematureEndError, 'eof escape', '\\'
18
+ include_examples 'scan error', RS::PrematureEndError, 'eof in hex escape', '\x'
19
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u'
20
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u0'
21
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u00'
22
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u000'
23
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{'
24
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{00'
25
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000'
26
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 '
27
+ include_examples 'scan error', RS::PrematureEndError, 'eof in cp escape', '\u{0000 0000'
28
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c'
29
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M'
30
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\c\M-'
31
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C'
32
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-'
33
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M'
34
+ include_examples 'scan error', RS::PrematureEndError, 'eof in c-seq', '\C-\M-'
35
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M'
36
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-'
37
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\\'
38
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\c'
39
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C'
40
+ include_examples 'scan error', RS::PrematureEndError, 'eof in m-seq', '\M-\C-'
41
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ'
42
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid hex', '\xZ0'
43
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\cü'
44
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\c\M-ü'
45
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-ü'
46
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid c-seq', '\C-\M-ü'
47
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-ü'
48
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\cü'
49
+ include_examples 'scan error', RS::InvalidSequenceError, 'invalid m-seq', '\M-\C-ü'
50
+ include_examples 'scan error', RS::ScannerError, 'invalid c-seq', '\Ca'
51
+ include_examples 'scan error', RS::ScannerError, 'invalid m-seq', '\Ma'
52
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?'')"
53
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', "(?''empty-name)"
54
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>)'
55
+ include_examples 'scan error', RS::InvalidGroupError, 'invalid group', '(?<>empty-name)'
56
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?foo)'
57
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix abc)'
58
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?mix^bc'
59
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid option', '(?)'
60
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-foo)'
61
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-u)'
62
+ include_examples 'scan error', RS::InvalidGroupOption, 'invalid neg option', '(?-mixu)'
63
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k<>'
64
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty backref', '\k\'\''
65
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g<>'
66
+ include_examples 'scan error', RS::InvalidBackrefError, 'empty refcall', '\g\'\''
67
+ include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
90
68
  end