regexp_parser 1.5.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -0
  3. data/Gemfile +3 -3
  4. data/README.md +14 -6
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +6 -43
  7. data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  9. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  10. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  11. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  12. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  13. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  14. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  15. data/lib/regexp_parser/expression/sequence.rb +3 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  17. data/lib/regexp_parser/lexer.rb +4 -25
  18. data/lib/regexp_parser/parser.rb +40 -33
  19. data/lib/regexp_parser/scanner.rb +1208 -1353
  20. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  21. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  22. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  23. data/lib/regexp_parser/scanner/scanner.rl +116 -202
  24. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  25. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  26. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  27. data/lib/regexp_parser/version.rb +1 -1
  28. data/spec/expression/base_spec.rb +14 -0
  29. data/spec/expression/methods/match_length_spec.rb +20 -0
  30. data/spec/expression/methods/match_spec.rb +25 -0
  31. data/spec/expression/methods/tests_spec.rb +2 -0
  32. data/spec/expression/methods/traverse_spec.rb +21 -0
  33. data/spec/expression/options_spec.rb +128 -0
  34. data/spec/expression/root_spec.rb +9 -0
  35. data/spec/expression/sequence_spec.rb +9 -0
  36. data/spec/lexer/conditionals_spec.rb +49 -119
  37. data/spec/lexer/delimiters_spec.rb +68 -0
  38. data/spec/lexer/escapes_spec.rb +8 -32
  39. data/spec/lexer/keep_spec.rb +5 -17
  40. data/spec/lexer/literals_spec.rb +73 -110
  41. data/spec/lexer/nesting_spec.rb +86 -117
  42. data/spec/lexer/refcalls_spec.rb +51 -50
  43. data/spec/parser/all_spec.rb +13 -1
  44. data/spec/parser/anchors_spec.rb +9 -23
  45. data/spec/parser/conditionals_spec.rb +9 -9
  46. data/spec/parser/errors_spec.rb +22 -43
  47. data/spec/parser/escapes_spec.rb +33 -44
  48. data/spec/parser/free_space_spec.rb +25 -4
  49. data/spec/parser/groups_spec.rb +98 -257
  50. data/spec/parser/keep_spec.rb +2 -15
  51. data/spec/parser/options_spec.rb +28 -0
  52. data/spec/parser/posix_classes_spec.rb +5 -24
  53. data/spec/parser/properties_spec.rb +42 -54
  54. data/spec/parser/quantifiers_spec.rb +42 -283
  55. data/spec/parser/refcalls_spec.rb +60 -185
  56. data/spec/parser/set/intersections_spec.rb +17 -17
  57. data/spec/parser/set/ranges_spec.rb +17 -17
  58. data/spec/parser/sets_spec.rb +5 -5
  59. data/spec/parser/types_spec.rb +11 -36
  60. data/spec/scanner/anchors_spec.rb +13 -28
  61. data/spec/scanner/conditionals_spec.rb +121 -173
  62. data/spec/scanner/delimiters_spec.rb +52 -0
  63. data/spec/scanner/errors_spec.rb +64 -87
  64. data/spec/scanner/escapes_spec.rb +53 -50
  65. data/spec/scanner/free_space_spec.rb +102 -165
  66. data/spec/scanner/groups_spec.rb +45 -64
  67. data/spec/scanner/keep_spec.rb +5 -28
  68. data/spec/scanner/literals_spec.rb +45 -81
  69. data/spec/scanner/meta_spec.rb +13 -33
  70. data/spec/scanner/options_spec.rb +36 -0
  71. data/spec/scanner/properties_spec.rb +43 -286
  72. data/spec/scanner/quantifiers_spec.rb +13 -28
  73. data/spec/scanner/refcalls_spec.rb +32 -48
  74. data/spec/scanner/sets_spec.rb +88 -102
  75. data/spec/scanner/types_spec.rb +10 -25
  76. data/spec/spec_helper.rb +1 -0
  77. data/spec/support/shared_examples.rb +77 -0
  78. data/spec/syntax/syntax_spec.rb +4 -0
  79. data/spec/syntax/versions/1.8.6_spec.rb +12 -33
  80. data/spec/syntax/versions/1.9.1_spec.rb +5 -18
  81. data/spec/syntax/versions/1.9.3_spec.rb +4 -17
  82. data/spec/syntax/versions/2.0.0_spec.rb +8 -23
  83. data/spec/syntax/versions/2.2.0_spec.rb +4 -17
  84. data/spec/syntax/versions/aliases_spec.rb +27 -109
  85. metadata +28 -10
  86. data/spec/scanner/scripts_spec.rb +0 -49
  87. data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  # edge cases with `...-&&...` and `...&&-...` are checked in test_ranges.rb
4
4
 
5
- RSpec.describe('SetIntersection parsing') do
5
+ RSpec.describe('CharacterSet::Intersection parsing') do
6
6
  specify('parse set intersection') do
7
7
  root = RP.parse('[a&&z]')
8
8
  set = root[0]
@@ -22,9 +22,9 @@ RSpec.describe('SetIntersection parsing') do
22
22
  expect(seq2.first.to_s).to eq 'z'
23
23
  expect(seq2.first).to be_instance_of(Literal)
24
24
 
25
- expect(set.matches?('a')).to be false
26
- expect(set.matches?('&')).to be false
27
- expect(set.matches?('z')).to be false
25
+ expect(set).not_to match 'a'
26
+ expect(set).not_to match '&'
27
+ expect(set).not_to match 'z'
28
28
  end
29
29
 
30
30
  specify('parse set intersection range and subset') do
@@ -46,9 +46,9 @@ RSpec.describe('SetIntersection parsing') do
46
46
  expect(seq2.first.to_s).to eq '[^a]'
47
47
  expect(seq2.first).to be_instance_of(CharacterSet)
48
48
 
49
- expect(set.matches?('a')).to be false
50
- expect(set.matches?('&')).to be false
51
- expect(set.matches?('b')).to be true
49
+ expect(set).not_to match 'a'
50
+ expect(set).not_to match '&'
51
+ expect(set).to match 'b'
52
52
  end
53
53
 
54
54
  specify('parse set intersection trailing range') do
@@ -70,9 +70,9 @@ RSpec.describe('SetIntersection parsing') do
70
70
  expect(seq2.first.to_s).to eq 'a-z'
71
71
  expect(seq2.first).to be_instance_of(CharacterSet::Range)
72
72
 
73
- expect(set.matches?('a')).to be true
74
- expect(set.matches?('&')).to be false
75
- expect(set.matches?('b')).to be false
73
+ expect(set).to match 'a'
74
+ expect(set).not_to match '&'
75
+ expect(set).not_to match 'b'
76
76
  end
77
77
 
78
78
  specify('parse set intersection type') do
@@ -94,9 +94,9 @@ RSpec.describe('SetIntersection parsing') do
94
94
  expect(seq2.first.to_s).to eq '\\w'
95
95
  expect(seq2.first).to be_instance_of(CharacterType::Word)
96
96
 
97
- expect(set.matches?('a')).to be true
98
- expect(set.matches?('&')).to be false
99
- expect(set.matches?('b')).to be false
97
+ expect(set).to match 'a'
98
+ expect(set).not_to match '&'
99
+ expect(set).not_to match 'b'
100
100
  end
101
101
 
102
102
  specify('parse set intersection multipart') do
@@ -119,9 +119,9 @@ RSpec.describe('SetIntersection parsing') do
119
119
  expect(seq3.count).to eq 3
120
120
  expect(seq3.to_s).to eq 'efg'
121
121
 
122
- expect(set.matches?('e')).to be true
123
- expect(set.matches?('f')).to be true
124
- expect(set.matches?('a')).to be false
125
- expect(set.matches?('g')).to be false
122
+ expect(set).to match 'e'
123
+ expect(set).to match 'f'
124
+ expect(set).not_to match 'a'
125
+ expect(set).not_to match 'g'
126
126
  end
127
127
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('SetRang parsing') do
3
+ RSpec.describe('CharacterSet::Range parsing') do
4
4
  specify('parse set range') do
5
5
  root = RP.parse('[a-z]')
6
6
  set = root[0]
@@ -13,7 +13,7 @@ RSpec.describe('SetRang parsing') do
13
13
  expect(range.first).to be_instance_of(Literal)
14
14
  expect(range.last.to_s).to eq 'z'
15
15
  expect(range.last).to be_instance_of(Literal)
16
- expect(set.matches?('m')).to be true
16
+ expect(set).to match 'm'
17
17
  end
18
18
 
19
19
  specify('parse set range hex') do
@@ -28,7 +28,7 @@ RSpec.describe('SetRang parsing') do
28
28
  expect(range.first).to be_instance_of(EscapeSequence::Hex)
29
29
  expect(range.last.to_s).to eq '\\x99'
30
30
  expect(range.last).to be_instance_of(EscapeSequence::Hex)
31
- expect(set.matches?('\\x50')).to be true
31
+ expect(set).to match '\\x50'
32
32
  end
33
33
 
34
34
  specify('parse set range unicode') do
@@ -43,7 +43,7 @@ RSpec.describe('SetRang parsing') do
43
43
  expect(range.first).to be_instance_of(EscapeSequence::CodepointList)
44
44
  expect(range.last.to_s).to eq '\\u1234'
45
45
  expect(range.last).to be_instance_of(EscapeSequence::Codepoint)
46
- expect(set.matches?('\\u600')).to be true
46
+ expect(set).to match '\\u600'
47
47
  end
48
48
 
49
49
  specify('parse set range edge case leading dash') do
@@ -53,7 +53,7 @@ RSpec.describe('SetRang parsing') do
53
53
 
54
54
  expect(set.count).to eq 1
55
55
  expect(range.count).to eq 2
56
- expect(set.matches?('a')).to be true
56
+ expect(set).to match 'a'
57
57
  end
58
58
 
59
59
  specify('parse set range edge case trailing dash') do
@@ -63,7 +63,7 @@ RSpec.describe('SetRang parsing') do
63
63
 
64
64
  expect(set.count).to eq 1
65
65
  expect(range.count).to eq 2
66
- expect(set.matches?('$')).to be true
66
+ expect(set).to match '$'
67
67
  end
68
68
 
69
69
  specify('parse set range edge case leading negate') do
@@ -71,8 +71,8 @@ RSpec.describe('SetRang parsing') do
71
71
  set = root[0]
72
72
 
73
73
  expect(set.count).to eq 2
74
- expect(set.matches?('a')).to be true
75
- expect(set.matches?('z')).to be false
74
+ expect(set).to match 'a'
75
+ expect(set).not_to match 'z'
76
76
  end
77
77
 
78
78
  specify('parse set range edge case trailing negate') do
@@ -82,7 +82,7 @@ RSpec.describe('SetRang parsing') do
82
82
 
83
83
  expect(set.count).to eq 1
84
84
  expect(range.count).to eq 2
85
- expect(set.matches?('$')).to be true
85
+ expect(set).to match '$'
86
86
  end
87
87
 
88
88
  specify('parse set range edge case leading intersection') do
@@ -91,10 +91,10 @@ RSpec.describe('SetRang parsing') do
91
91
 
92
92
  expect(set.count).to eq 1
93
93
  expect(set.first.last.to_s).to eq '-bc'
94
- expect(set.matches?('-')).to be true
95
- expect(set.matches?('b')).to be true
96
- expect(set.matches?('a')).to be false
97
- expect(set.matches?('c')).to be false
94
+ expect(set).to match '-'
95
+ expect(set).to match 'b'
96
+ expect(set).not_to match 'a'
97
+ expect(set).not_to match 'c'
98
98
  end
99
99
 
100
100
  specify('parse set range edge case trailing intersection') do
@@ -103,9 +103,9 @@ RSpec.describe('SetRang parsing') do
103
103
 
104
104
  expect(set.count).to eq 1
105
105
  expect(set.first.first.to_s).to eq 'bc-'
106
- expect(set.matches?('-')).to be true
107
- expect(set.matches?('b')).to be true
108
- expect(set.matches?('a')).to be false
109
- expect(set.matches?('c')).to be false
106
+ expect(set).to match '-'
107
+ expect(set).to match 'b'
108
+ expect(set).not_to match 'a'
109
+ expect(set).not_to match 'c'
110
110
  end
111
111
  end
@@ -1,6 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('Set parsing') do
3
+ RSpec.describe('CharacterSet parsing') do
4
4
  specify('parse set basic') do
5
5
  root = RP.parse('[ab]+')
6
6
  exp = root[0]
@@ -39,10 +39,10 @@ RSpec.describe('Set parsing') do
39
39
  expect(exp[1]).to be_instance_of(EscapeSequence::Backspace)
40
40
  expect(exp[1].text).to eq '\\b'
41
41
 
42
- expect(exp.matches?('a')).to be true
43
- expect(exp.matches?("\b")).to be true
44
- expect(exp.matches?('b')).to be false
45
- expect(exp.matches?('c')).to be true
42
+ expect(exp).to match 'a'
43
+ expect(exp).to match "\b"
44
+ expect(exp).not_to match 'b'
45
+ expect(exp).to match 'c'
46
46
  end
47
47
 
48
48
  specify('parse set escape sequence hex') do
@@ -1,43 +1,18 @@
1
1
  require 'spec_helper'
2
2
 
3
- RSpec.describe('Type parsing') do
4
- tests = {
5
- /a\dc/ => [1, :type, :digit, CharacterType::Digit],
6
- /a\Dc/ => [1, :type, :nondigit, CharacterType::NonDigit],
3
+ RSpec.describe('CharacterType parsing') do
4
+ include_examples 'parse', /a\dc/, 1 => [:type, :digit, CharacterType::Digit]
5
+ include_examples 'parse', /a\Dc/, 1 => [:type, :nondigit, CharacterType::NonDigit]
7
6
 
8
- /a\sc/ => [1, :type, :space, CharacterType::Space],
9
- /a\Sc/ => [1, :type, :nonspace, CharacterType::NonSpace],
7
+ include_examples 'parse', /a\sc/, 1 => [:type, :space, CharacterType::Space]
8
+ include_examples 'parse', /a\Sc/, 1 => [:type, :nonspace, CharacterType::NonSpace]
10
9
 
11
- /a\hc/ => [1, :type, :hex, CharacterType::Hex],
12
- /a\Hc/ => [1, :type, :nonhex, CharacterType::NonHex],
10
+ include_examples 'parse', /a\hc/, 1 => [:type, :hex, CharacterType::Hex]
11
+ include_examples 'parse', /a\Hc/, 1 => [:type, :nonhex, CharacterType::NonHex]
13
12
 
14
- /a\wc/ => [1, :type, :word, CharacterType::Word],
15
- /a\Wc/ => [1, :type, :nonword, CharacterType::NonWord],
16
- }
13
+ include_examples 'parse', /a\wc/, 1 => [:type, :word, CharacterType::Word]
14
+ include_examples 'parse', /a\Wc/, 1 => [:type, :nonword, CharacterType::NonWord]
17
15
 
18
- tests.each_with_index do |(pattern, (index, type, token, klass)), count|
19
- specify("parse_type_#{token}_#{count}") do
20
- root = RP.parse(pattern, 'ruby/1.9')
21
- exp = root.expressions.at(index)
22
-
23
- expect(exp).to be_a(klass)
24
-
25
- expect(exp.type).to eq type
26
- expect(exp.token).to eq token
27
- end
28
- end
29
-
30
- tests_2_0 = { 'a\\Rc' => [1, :type, :linebreak, CharacterType::Linebreak], 'a\\Xc' => [1, :type, :xgrapheme, CharacterType::ExtendedGrapheme] }
31
-
32
- tests_2_0.each_with_index do |(pattern, (index, type, token, klass)), count|
33
- specify("parse_type_#{token}_#{count}") do
34
- root = RP.parse(pattern, 'ruby/2.0')
35
- exp = root.expressions.at(index)
36
-
37
- expect(exp).to be_a(klass)
38
-
39
- expect(exp.type).to eq type
40
- expect(exp.token).to eq token
41
- end
42
- end
16
+ include_examples 'parse', 'a\\Rc', 1 => [:type, :linebreak, CharacterType::Linebreak]
17
+ include_examples 'parse', 'a\\Xc', 1 => [:type, :xgrapheme, CharacterType::ExtendedGrapheme]
43
18
  end
@@ -1,36 +1,21 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Anchor scanning') do
4
- tests = {
5
- '^abc' => [0, :anchor, :bol, '^', 0, 1],
6
- 'abc$' => [1, :anchor, :eol, '$', 3, 4],
4
+ include_examples 'scan', '^abc', 0 => [:anchor, :bol, '^', 0, 1]
5
+ include_examples 'scan', 'abc$', 1 => [:anchor, :eol, '$', 3, 4]
7
6
 
8
- '\Aabc' => [0, :anchor, :bos, '\A', 0, 2],
9
- 'abc\z' => [1, :anchor, :eos, '\z', 3, 5],
10
- 'abc\Z' => [1, :anchor, :eos_ob_eol, '\Z', 3, 5],
7
+ include_examples 'scan', '\Aabc', 0 => [:anchor, :bos, '\A', 0, 2]
8
+ include_examples 'scan', 'abc\z', 1 => [:anchor, :eos, '\z', 3, 5]
9
+ include_examples 'scan', 'abc\Z', 1 => [:anchor, :eos_ob_eol, '\Z', 3, 5]
11
10
 
12
- 'a\bc' => [1, :anchor, :word_boundary, '\b', 1, 3],
13
- 'a\Bc' => [1, :anchor, :nonword_boundary, '\B', 1, 3],
11
+ include_examples 'scan', 'a\bc', 1 => [:anchor, :word_boundary, '\b', 1, 3]
12
+ include_examples 'scan', 'a\Bc', 1 => [:anchor, :nonword_boundary, '\B', 1, 3]
14
13
 
15
- 'a\Gc' => [1, :anchor, :match_start, '\G', 1, 3],
14
+ include_examples 'scan', 'a\Gc', 1 => [:anchor, :match_start, '\G', 1, 3]
16
15
 
17
- "\\\\Ac" => [0, :escape, :backslash, '\\\\', 0, 2],
18
- "a\\\\z" => [1, :escape, :backslash, '\\\\', 1, 3],
19
- "a\\\\Z" => [1, :escape, :backslash, '\\\\', 1, 3],
20
- "a\\\\bc" => [1, :escape, :backslash, '\\\\', 1, 3],
21
- "a\\\\Bc" => [1, :escape, :backslash, '\\\\', 1, 3],
22
- }
23
-
24
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
25
- specify("scanner_#{type}_#{token}_#{count}") do
26
- tokens = RS.scan(pattern)
27
- result = tokens[index]
28
-
29
- expect(result[0]).to eq type
30
- expect(result[1]).to eq token
31
- expect(result[2]).to eq text
32
- expect(result[3]).to eq ts
33
- expect(result[4]).to eq te
34
- end
35
- end
16
+ include_examples 'scan', "\\\\Ac", 0 => [:escape, :backslash, '\\\\', 0, 2]
17
+ include_examples 'scan', "a\\\\z", 1 => [:escape, :backslash, '\\\\', 1, 3]
18
+ include_examples 'scan', "a\\\\Z", 1 => [:escape, :backslash, '\\\\', 1, 3]
19
+ include_examples 'scan', "a\\\\bc", 1 => [:escape, :backslash, '\\\\', 1, 3]
20
+ include_examples 'scan', "a\\\\Bc", 1 => [:escape, :backslash, '\\\\', 1, 3]
36
21
  end
@@ -1,180 +1,128 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Conditional scanning') do
4
- tests = {
5
- '(a)(?(1)T|F)1' => [3, :conditional, :open, '(?', 3, 5],
6
- '(a)(?(1)T|F)2' => [4, :conditional, :condition_open, '(', 5, 6],
7
- '(a)(?(1)T|F)3' => [5, :conditional, :condition, '1', 6, 7],
8
- '(a)(?(1)T|F)4' => [6, :conditional, :condition_close, ')', 7, 8],
9
- '(a)(?(1)T|F)5' => [7, :literal, :literal, 'T', 8, 9],
10
- '(a)(?(1)T|F)6' => [8, :conditional, :separator, '|', 9, 10],
11
- '(a)(?(1)T|F)7' => [9, :literal, :literal, 'F', 10, 11],
12
- '(a)(?(1)T|F)8' => [10, :conditional, :close, ')', 11, 12],
13
- '(a)(?(1)TRUE)9' => [8, :conditional, :close, ')', 12, 13],
14
- '(a)(?(1)TRUE|)10' => [8, :conditional, :separator, '|', 12, 13],
15
- '(a)(?(1)TRUE|)11' => [9, :conditional, :close, ')', 13, 14],
16
- '(?<N>A)(?(<N>)T|F)1' => [5, :conditional, :condition, '<N>', 10, 13],
17
- "(?'N'A)(?('N')T|F)2" => [5, :conditional, :condition, "'N'", 10, 13]
18
- }
4
+ include_examples 'scan', /(a)(?(1)T|F)1/, 3 => [:conditional, :open, '(?', 3, 5]
5
+ include_examples 'scan', /(a)(?(1)T|F)2/, 4 => [:conditional, :condition_open, '(', 5, 6]
6
+ include_examples 'scan', /(a)(?(1)T|F)3/, 5 => [:conditional, :condition, '1', 6, 7]
7
+ include_examples 'scan', /(a)(?(1)T|F)4/, 6 => [:conditional, :condition_close, ')', 7, 8]
8
+ include_examples 'scan', /(a)(?(1)T|F)5/, 7 => [:literal, :literal, 'T', 8, 9]
9
+ include_examples 'scan', /(a)(?(1)T|F)6/, 8 => [:conditional, :separator, '|', 9, 10]
10
+ include_examples 'scan', /(a)(?(1)T|F)7/, 9 => [:literal, :literal, 'F', 10, 11]
11
+ include_examples 'scan', /(a)(?(1)T|F)8/, 10 => [:conditional, :close, ')', 11, 12]
12
+ include_examples 'scan', /(a)(?(1)TRUE)9/, 8 => [:conditional, :close, ')', 12, 13]
13
+ include_examples 'scan', /(a)(?(1)TRUE|)10/, 8 => [:conditional, :separator, '|', 12, 13]
14
+ include_examples 'scan', /(a)(?(1)TRUE|)11/, 9 => [:conditional, :close, ')', 13, 14]
15
+ include_examples 'scan', /(?<N>A)(?(<N>)T|F)1/, 5 => [:conditional, :condition, '<N>', 10, 13]
16
+ include_examples 'scan', /(?'N'A)(?('N')T|F)2/, 5 => [:conditional, :condition, "'N'", 10, 13]
19
17
 
20
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te)), count|
21
- specify("scanner_#{type}_#{token}_#{count}") do
22
- tokens = RS.scan(pattern)
23
- result = tokens[index]
18
+ include_examples 'scan', /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/,
19
+ 0 => [:group, :capture, '(', 0, 1],
20
+ 1 => [:literal, :literal, 'a', 1, 2],
21
+ 2 => [:group, :capture, '(', 2, 3],
22
+ 3 => [:literal, :literal, 'b', 3, 4],
23
+ 4 => [:group, :capture, '(', 4, 5],
24
+ 5 => [:literal, :literal, 'c', 5, 6],
25
+ 6 => [:group, :close, ')', 6, 7],
26
+ 7 => [:group, :close, ')', 7, 8],
27
+ 8 => [:group, :close, ')', 8, 9],
28
+ 9 => [:conditional, :open, '(?', 9, 11],
29
+ 10 => [:conditional, :condition_open, '(', 11, 12],
30
+ 11 => [:conditional, :condition, '1', 12, 13],
31
+ 12 => [:conditional, :condition_close, ')', 13, 14],
32
+ 13 => [:conditional, :open, '(?', 14, 16],
33
+ 14 => [:conditional, :condition_open, '(', 16, 17],
34
+ 15 => [:conditional, :condition, '2', 17, 18],
35
+ 16 => [:conditional, :condition_close, ')', 18, 19],
36
+ 17 => [:literal, :literal, 'd', 19, 20],
37
+ 18 => [:conditional, :separator, '|', 20, 21],
38
+ 19 => [:conditional, :open, '(?', 21, 23],
39
+ 20 => [:conditional, :condition_open, '(', 23, 24],
40
+ 21 => [:conditional, :condition, '3', 24, 25],
41
+ 22 => [:conditional, :condition_close, ')', 25, 26],
42
+ 23 => [:literal, :literal, 'e', 26, 27],
43
+ 24 => [:conditional, :separator, '|', 27, 28],
44
+ 25 => [:literal, :literal, 'f', 28, 29],
45
+ 26 => [:conditional, :close, ')', 29, 30],
46
+ 27 => [:conditional, :close, ')', 30, 31],
47
+ 28 => [:conditional, :separator, '|', 31, 32],
48
+ 29 => [:conditional, :open, '(?', 32, 34],
49
+ 30 => [:conditional, :condition_open, '(', 34, 35],
50
+ 31 => [:conditional, :condition, '2', 35, 36],
51
+ 32 => [:conditional, :condition_close, ')', 36, 37],
52
+ 33 => [:conditional, :open, '(?', 37, 39],
53
+ 34 => [:conditional, :condition_open, '(', 39, 40],
54
+ 35 => [:conditional, :condition, '1', 40, 41],
55
+ 36 => [:conditional, :condition_close, ')', 41, 42],
56
+ 37 => [:literal, :literal, 'g', 42, 43],
57
+ 38 => [:conditional, :separator, '|', 43, 44],
58
+ 39 => [:literal, :literal, 'h', 44, 45],
59
+ 40 => [:conditional, :close, ')', 45, 46],
60
+ 41 => [:conditional, :close, ')', 46, 47],
61
+ 42 => [:conditional, :close, ')', 47, 48]
24
62
 
25
- expect(result[0]).to eq type
26
- expect(result[1]).to eq token
27
- expect(result[2]).to eq text
28
- expect(result[3]).to eq ts
29
- expect(result[4]).to eq te
30
- end
31
- end
63
+ include_examples 'scan', /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/,
64
+ 0 => [:group, :capture, '(', 0, 1],
65
+ 1 => [:group, :capture, '(', 1, 2],
66
+ 2 => [:literal, :literal, 'a', 2, 3],
67
+ 3 => [:group, :close, ')', 3, 4],
68
+ 4 => [:meta, :alternation, '|', 4, 5],
69
+ 5 => [:group, :capture, '(', 5, 6],
70
+ 6 => [:literal, :literal, 'b', 6, 7],
71
+ 7 => [:group, :close, ')', 7, 8],
72
+ 8 => [:meta, :alternation, '|', 8, 9],
73
+ 9 => [:group, :capture, '(', 9, 10],
74
+ 10 => [:conditional, :open, '(?', 10, 12],
75
+ 11 => [:conditional, :condition_open, '(', 12, 13],
76
+ 12 => [:conditional, :condition, '2', 13, 14],
77
+ 13 => [:conditional, :condition_close, ')', 14, 15],
78
+ 14 => [:group, :capture, '(', 15, 16],
79
+ 15 => [:literal, :literal, 'c', 16, 17],
80
+ 16 => [:group, :capture, '(', 17, 18],
81
+ 17 => [:literal, :literal, 'd', 18, 19],
82
+ 18 => [:meta, :alternation, '|', 19, 20],
83
+ 19 => [:literal, :literal, 'e', 20, 21],
84
+ 20 => [:group, :close, ')', 21, 22],
85
+ 21 => [:quantifier, :one_or_more, '+', 22, 23],
86
+ 22 => [:group, :close, ')', 23, 24],
87
+ 23 => [:quantifier, :zero_or_one, '?', 24, 25],
88
+ 24 => [:conditional, :separator, '|', 25, 26],
89
+ 25 => [:conditional, :open, '(?', 26, 28],
90
+ 26 => [:conditional, :condition_open, '(', 28, 29],
91
+ 27 => [:conditional, :condition, '3', 29, 30],
92
+ 28 => [:conditional, :condition_close, ')', 30, 31],
93
+ 29 => [:literal, :literal, 'f', 31, 32],
94
+ 30 => [:conditional, :separator, '|', 32, 33],
95
+ 31 => [:conditional, :open, '(?', 33, 35],
96
+ 32 => [:conditional, :condition_open, '(', 35, 36],
97
+ 33 => [:conditional, :condition, '4', 36, 37],
98
+ 34 => [:conditional, :condition_close, ')', 37, 38],
99
+ 35 => [:group, :capture, '(', 38, 39],
100
+ 36 => [:literal, :literal, 'g', 39, 40],
101
+ 37 => [:meta, :alternation, '|', 40, 41],
102
+ 38 => [:group, :capture, '(', 41, 42],
103
+ 39 => [:literal, :literal, 'h', 42, 43],
104
+ 40 => [:group, :close, ')', 43, 44],
105
+ 41 => [:group, :capture, '(', 44, 45],
106
+ 42 => [:literal, :literal, 'i', 45, 46],
107
+ 43 => [:group, :close, ')', 46, 47],
108
+ 44 => [:group, :close, ')', 47, 48],
109
+ 45 => [:conditional, :close, ')', 48, 49],
110
+ 46 => [:conditional, :close, ')', 49, 50],
111
+ 47 => [:conditional, :close, ')', 50, 51],
112
+ 48 => [:group, :close, ')', 51, 52],
113
+ 49 => [:group, :close, ')', 52, 53]
32
114
 
33
- specify('scan conditional nested') do
34
- regexp = '(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))'
35
- tokens = RS.scan(regexp)
36
-
37
- [
38
- [ 0, :group, :capture, '(', 0, 1],
39
- [ 1, :literal, :literal, 'a', 1, 2],
40
- [ 2, :group, :capture, '(', 2, 3],
41
- [ 3, :literal, :literal, 'b', 3, 4],
42
- [ 4, :group, :capture, '(', 4, 5],
43
- [ 5, :literal, :literal, 'c', 5, 6],
44
- [ 6, :group, :close, ')', 6, 7],
45
- [ 7, :group, :close, ')', 7, 8],
46
- [ 8, :group, :close, ')', 8, 9],
47
- [ 9, :conditional, :open, '(?', 9, 11],
48
- [10, :conditional, :condition_open, '(', 11, 12],
49
- [11, :conditional, :condition, '1', 12, 13],
50
- [12, :conditional, :condition_close, ')', 13, 14],
51
- [13, :conditional, :open, '(?', 14, 16],
52
- [14, :conditional, :condition_open, '(', 16, 17],
53
- [15, :conditional, :condition, '2', 17, 18],
54
- [16, :conditional, :condition_close, ')', 18, 19],
55
- [17, :literal, :literal, 'd', 19, 20],
56
- [18, :conditional, :separator, '|', 20, 21],
57
- [19, :conditional, :open, '(?', 21, 23],
58
- [20, :conditional, :condition_open, '(', 23, 24],
59
- [21, :conditional, :condition, '3', 24, 25],
60
- [22, :conditional, :condition_close, ')', 25, 26],
61
- [23, :literal, :literal, 'e', 26, 27],
62
- [24, :conditional, :separator, '|', 27, 28],
63
- [25, :literal, :literal, 'f', 28, 29],
64
- [26, :conditional, :close, ')', 29, 30],
65
- [27, :conditional, :close, ')', 30, 31],
66
- [28, :conditional, :separator, '|', 31, 32],
67
- [29, :conditional, :open, '(?', 32, 34],
68
- [30, :conditional, :condition_open, '(', 34, 35],
69
- [31, :conditional, :condition, '2', 35, 36],
70
- [32, :conditional, :condition_close, ')', 36, 37],
71
- [33, :conditional, :open, '(?', 37, 39],
72
- [34, :conditional, :condition_open, '(', 39, 40],
73
- [35, :conditional, :condition, '1', 40, 41],
74
- [36, :conditional, :condition_close, ')', 41, 42],
75
- [37, :literal, :literal, 'g', 42, 43],
76
- [38, :conditional, :separator, '|', 43, 44],
77
- [39, :literal, :literal, 'h', 44, 45],
78
- [40, :conditional, :close, ')', 45, 46],
79
- [41, :conditional, :close, ')', 46, 47],
80
- [42, :conditional, :close, ')', 47, 48]
81
- ].each do |index, type, token, text, ts, te|
82
- result = tokens[index]
83
-
84
- expect(result[0]).to eq type
85
- expect(result[1]).to eq token
86
- expect(result[2]).to eq text
87
- expect(result[3]).to eq ts
88
- expect(result[4]).to eq te
89
- end
90
- end
91
-
92
- specify('scan conditional nested groups') do
93
- regexp = '((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))'
94
- tokens = RS.scan(regexp)
95
-
96
- [
97
- [ 0, :group, :capture, '(', 0, 1],
98
- [ 1, :group, :capture, '(', 1, 2],
99
- [ 2, :literal, :literal, 'a', 2, 3],
100
- [ 3, :group, :close, ')', 3, 4],
101
- [ 4, :meta, :alternation, '|', 4, 5],
102
- [ 5, :group, :capture, '(', 5, 6],
103
- [ 6, :literal, :literal, 'b', 6, 7],
104
- [ 7, :group, :close, ')', 7, 8],
105
- [ 8, :meta, :alternation, '|', 8, 9],
106
- [ 9, :group, :capture, '(', 9, 10],
107
- [10, :conditional, :open, '(?', 10, 12],
108
- [11, :conditional, :condition_open, '(', 12, 13],
109
- [12, :conditional, :condition, '2', 13, 14],
110
- [13, :conditional, :condition_close, ')', 14, 15],
111
- [14, :group, :capture, '(', 15, 16],
112
- [15, :literal, :literal, 'c', 16, 17],
113
- [16, :group, :capture, '(', 17, 18],
114
- [17, :literal, :literal, 'd', 18, 19],
115
- [18, :meta, :alternation, '|', 19, 20],
116
- [19, :literal, :literal, 'e', 20, 21],
117
- [20, :group, :close, ')', 21, 22],
118
- [21, :quantifier, :one_or_more, '+', 22, 23],
119
- [22, :group, :close, ')', 23, 24],
120
- [23, :quantifier, :zero_or_one, '?', 24, 25],
121
- [24, :conditional, :separator, '|', 25, 26],
122
- [25, :conditional, :open, '(?', 26, 28],
123
- [26, :conditional, :condition_open, '(', 28, 29],
124
- [27, :conditional, :condition, '3', 29, 30],
125
- [28, :conditional, :condition_close, ')', 30, 31],
126
- [29, :literal, :literal, 'f', 31, 32],
127
- [30, :conditional, :separator, '|', 32, 33],
128
- [31, :conditional, :open, '(?', 33, 35],
129
- [32, :conditional, :condition_open, '(', 35, 36],
130
- [33, :conditional, :condition, '4', 36, 37],
131
- [34, :conditional, :condition_close, ')', 37, 38],
132
- [35, :group, :capture, '(', 38, 39],
133
- [36, :literal, :literal, 'g', 39, 40],
134
- [37, :meta, :alternation, '|', 40, 41],
135
- [38, :group, :capture, '(', 41, 42],
136
- [39, :literal, :literal, 'h', 42, 43],
137
- [40, :group, :close, ')', 43, 44],
138
- [41, :group, :capture, '(', 44, 45],
139
- [42, :literal, :literal, 'i', 45, 46],
140
- [43, :group, :close, ')', 46, 47],
141
- [44, :group, :close, ')', 47, 48],
142
- [45, :conditional, :close, ')', 48, 49],
143
- [46, :conditional, :close, ')', 49, 50],
144
- [47, :conditional, :close, ')', 50, 51],
145
- [48, :group, :close, ')', 51, 52],
146
- [49, :group, :close, ')', 52, 53]
147
- ].each do |index, type, token, text, ts, te|
148
- result = tokens[index]
149
-
150
- expect(result[0]).to eq type
151
- expect(result[1]).to eq token
152
- expect(result[2]).to eq text
153
- expect(result[3]).to eq ts
154
- expect(result[4]).to eq te
155
- end
156
- end
157
-
158
- specify('scan conditional nested alternation') do
159
- regexp = '(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p'
160
- tokens = RS.scan(regexp)
161
-
162
- [9, 11, 17, 19, 32, 34, 40, 42, 46, 48].each do |index|
163
- result = tokens[index]
164
-
165
- expect(result[0]).to eq :meta
166
- expect(result[1]).to eq :alternation
167
- expect(result[2]).to eq '|'
168
- expect((result[4] - result[3])).to eq 1
169
- end
170
-
171
- [14, 37].each do |index|
172
- result = tokens[index]
173
-
174
- expect(result[0]).to eq :conditional
175
- expect(result[1]).to eq :separator
176
- expect(result[2]).to eq '|'
177
- expect((result[4] - result[3])).to eq 1
178
- end
179
- end
115
+ include_examples 'scan', /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/,
116
+ 9 => [:meta, :alternation, '|', 10, 11],
117
+ 11 => [:meta, :alternation, '|', 12, 13],
118
+ 14 => [:conditional, :separator, '|', 15, 16],
119
+ 17 => [:meta, :alternation, '|', 18, 19],
120
+ 19 => [:meta, :alternation, '|', 20, 21],
121
+ 32 => [:meta, :alternation, '|', 34, 35],
122
+ 34 => [:meta, :alternation, '|', 36, 37],
123
+ 37 => [:conditional, :separator, '|', 39, 40],
124
+ 40 => [:meta, :alternation, '|', 42, 43],
125
+ 42 => [:meta, :alternation, '|', 44, 45],
126
+ 46 => [:meta, :alternation, '|', 48, 49],
127
+ 48 => [:meta, :alternation, '|', 50, 51]
180
128
  end