regexp_parser 1.5.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -0
  3. data/Gemfile +3 -3
  4. data/README.md +14 -6
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +6 -43
  7. data/lib/regexp_parser/expression/classes/conditional.rb +3 -2
  8. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  9. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  10. data/lib/regexp_parser/expression/methods/match_length.rb +1 -1
  11. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  12. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  13. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  14. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  15. data/lib/regexp_parser/expression/sequence.rb +3 -2
  16. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  17. data/lib/regexp_parser/lexer.rb +4 -25
  18. data/lib/regexp_parser/parser.rb +40 -33
  19. data/lib/regexp_parser/scanner.rb +1208 -1353
  20. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  21. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  22. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  23. data/lib/regexp_parser/scanner/scanner.rl +116 -202
  24. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  25. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  26. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  27. data/lib/regexp_parser/version.rb +1 -1
  28. data/spec/expression/base_spec.rb +14 -0
  29. data/spec/expression/methods/match_length_spec.rb +20 -0
  30. data/spec/expression/methods/match_spec.rb +25 -0
  31. data/spec/expression/methods/tests_spec.rb +2 -0
  32. data/spec/expression/methods/traverse_spec.rb +21 -0
  33. data/spec/expression/options_spec.rb +128 -0
  34. data/spec/expression/root_spec.rb +9 -0
  35. data/spec/expression/sequence_spec.rb +9 -0
  36. data/spec/lexer/conditionals_spec.rb +49 -119
  37. data/spec/lexer/delimiters_spec.rb +68 -0
  38. data/spec/lexer/escapes_spec.rb +8 -32
  39. data/spec/lexer/keep_spec.rb +5 -17
  40. data/spec/lexer/literals_spec.rb +73 -110
  41. data/spec/lexer/nesting_spec.rb +86 -117
  42. data/spec/lexer/refcalls_spec.rb +51 -50
  43. data/spec/parser/all_spec.rb +13 -1
  44. data/spec/parser/anchors_spec.rb +9 -23
  45. data/spec/parser/conditionals_spec.rb +9 -9
  46. data/spec/parser/errors_spec.rb +22 -43
  47. data/spec/parser/escapes_spec.rb +33 -44
  48. data/spec/parser/free_space_spec.rb +25 -4
  49. data/spec/parser/groups_spec.rb +98 -257
  50. data/spec/parser/keep_spec.rb +2 -15
  51. data/spec/parser/options_spec.rb +28 -0
  52. data/spec/parser/posix_classes_spec.rb +5 -24
  53. data/spec/parser/properties_spec.rb +42 -54
  54. data/spec/parser/quantifiers_spec.rb +42 -283
  55. data/spec/parser/refcalls_spec.rb +60 -185
  56. data/spec/parser/set/intersections_spec.rb +17 -17
  57. data/spec/parser/set/ranges_spec.rb +17 -17
  58. data/spec/parser/sets_spec.rb +5 -5
  59. data/spec/parser/types_spec.rb +11 -36
  60. data/spec/scanner/anchors_spec.rb +13 -28
  61. data/spec/scanner/conditionals_spec.rb +121 -173
  62. data/spec/scanner/delimiters_spec.rb +52 -0
  63. data/spec/scanner/errors_spec.rb +64 -87
  64. data/spec/scanner/escapes_spec.rb +53 -50
  65. data/spec/scanner/free_space_spec.rb +102 -165
  66. data/spec/scanner/groups_spec.rb +45 -64
  67. data/spec/scanner/keep_spec.rb +5 -28
  68. data/spec/scanner/literals_spec.rb +45 -81
  69. data/spec/scanner/meta_spec.rb +13 -33
  70. data/spec/scanner/options_spec.rb +36 -0
  71. data/spec/scanner/properties_spec.rb +43 -286
  72. data/spec/scanner/quantifiers_spec.rb +13 -28
  73. data/spec/scanner/refcalls_spec.rb +32 -48
  74. data/spec/scanner/sets_spec.rb +88 -102
  75. data/spec/scanner/types_spec.rb +10 -25
  76. data/spec/spec_helper.rb +1 -0
  77. data/spec/support/shared_examples.rb +77 -0
  78. data/spec/syntax/syntax_spec.rb +4 -0
  79. data/spec/syntax/versions/1.8.6_spec.rb +12 -33
  80. data/spec/syntax/versions/1.9.1_spec.rb +5 -18
  81. data/spec/syntax/versions/1.9.3_spec.rb +4 -17
  82. data/spec/syntax/versions/2.0.0_spec.rb +8 -23
  83. data/spec/syntax/versions/2.2.0_spec.rb +4 -17
  84. data/spec/syntax/versions/aliases_spec.rb +27 -109
  85. metadata +28 -10
  86. data/spec/scanner/scripts_spec.rb +0 -49
  87. data/spec/scanner/unicode_blocks_spec.rb +0 -28
@@ -53,6 +53,10 @@ module Regexp::Syntax
53
53
 
54
54
  Age_V2_6_0 = [:'age=11.0']
55
55
 
56
+ Age_V2_6_2 = [:'age=12.0']
57
+
58
+ Age_V2_6_3 = [:'age=12.1']
59
+
56
60
  Age = all[:Age_V]
57
61
 
58
62
  Derived_V1_9_0 = [
@@ -297,6 +301,18 @@ module Regexp::Syntax
297
301
  :sogdian,
298
302
  ]
299
303
 
304
+ Script_V2_6_2 = [
305
+ :egyptian_hieroglyph_format_controls,
306
+ :elymaic,
307
+ :nandinagari,
308
+ :nyiakeng_puachue_hmong,
309
+ :ottoman_siyaq_numbers,
310
+ :small_kana_extension,
311
+ :symbols_and_pictographs_extended_a,
312
+ :tamil_supplement,
313
+ :wancho,
314
+ ]
315
+
300
316
  Script = all[:Script_V]
301
317
 
302
318
  UnicodeBlock_V1_9_0 = [
@@ -612,6 +628,18 @@ module Regexp::Syntax
612
628
  :in_sogdian,
613
629
  ]
614
630
 
631
+ UnicodeBlock_V2_6_2 = [
632
+ :in_egyptian_hieroglyph_format_controls,
633
+ :in_elymaic,
634
+ :in_nandinagari,
635
+ :in_nyiakeng_puachue_hmong,
636
+ :in_ottoman_siyaq_numbers,
637
+ :in_small_kana_extension,
638
+ :in_symbols_and_pictographs_extended_a,
639
+ :in_tamil_supplement,
640
+ :in_wancho,
641
+ ]
642
+
615
643
  UnicodeBlock = all[:UnicodeBlock_V]
616
644
 
617
645
  Emoji_V2_5_0 = [
@@ -632,6 +660,8 @@ module Regexp::Syntax
632
660
  V2_4_0 = all[:V2_4_0]
633
661
  V2_5_0 = all[:V2_5_0]
634
662
  V2_6_0 = all[:V2_6_0]
663
+ V2_6_2 = all[:V2_6_2]
664
+ V2_6_3 = all[:V2_6_3]
635
665
 
636
666
  All = all[/^V\d+_\d+_\d+$/]
637
667
 
@@ -0,0 +1,10 @@
1
+ module Regexp::Syntax
2
+ class V2_6_2 < Regexp::Syntax::V2_6_0
3
+ def initialize
4
+ super
5
+
6
+ implements :property, UnicodeProperty::V2_6_2
7
+ implements :nonproperty, UnicodeProperty::V2_6_2
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ module Regexp::Syntax
2
+ class V2_6_3 < Regexp::Syntax::V2_6_2
3
+ def initialize
4
+ super
5
+
6
+ implements :property, UnicodeProperty::V2_6_3
7
+ implements :nonproperty, UnicodeProperty::V2_6_3
8
+ end
9
+ end
10
+ end
@@ -1,5 +1,5 @@
1
1
  class Regexp
2
2
  class Parser
3
- VERSION = '1.5.0'
3
+ VERSION = '1.8.0'
4
4
  end
5
5
  end
@@ -77,4 +77,18 @@ RSpec.describe(Regexp::Expression::Base) do
77
77
  expect([root[2][1].coded_offset, root[2][1].to_s]).to eq ['@6+4', '(c?)']
78
78
  expect([root[2][1][0].coded_offset, root[2][1][0].to_s]).to eq ['@7+2', 'c?']
79
79
  end
80
+
81
+ specify('#quantity') do
82
+ expect(RP.parse(/aa/)[0].quantity).to eq [nil, nil]
83
+ expect(RP.parse(/a?/)[0].quantity).to eq [0, 1]
84
+ expect(RP.parse(/a*/)[0].quantity).to eq [0, -1]
85
+ expect(RP.parse(/a+/)[0].quantity).to eq [1, -1]
86
+ end
87
+
88
+ specify('#repetitions') do
89
+ expect(RP.parse(/aa/)[0].repetitions).to eq 1..1
90
+ expect(RP.parse(/a?/)[0].repetitions).to eq 0..1
91
+ expect(RP.parse(/a*/)[0].repetitions).to eq 0..(Float::INFINITY)
92
+ expect(RP.parse(/a+/)[0].repetitions).to eq 1..(Float::INFINITY)
93
+ end
80
94
  end
@@ -30,6 +30,12 @@ RSpec.describe(Regexp::MatchLength) do
30
30
  specify('absence group') { expect(ML.of('(?~abc)').minmax).to eq [0, Float::INFINITY] }
31
31
  end
32
32
 
33
+ specify('raises for missing references') do
34
+ exp = RP.parse(/(a)\1/).last
35
+ exp.referenced_expression = nil
36
+ expect { exp.match_length }.to raise_error(ArgumentError)
37
+ end
38
+
33
39
  describe('::of') do
34
40
  it('works with Regexps') { expect(ML.of(/foo/).minmax).to eq [3, 3] }
35
41
  it('works with Strings') { expect(ML.of('foo').minmax).to eq [3, 3] }
@@ -114,6 +120,13 @@ RSpec.describe(Regexp::MatchLength) do
114
120
  expect { result.next }.to raise_error(StopIteration)
115
121
  end
116
122
 
123
+ it 'is aware of limit option even if called without a block' do
124
+ result = ML.of(/a?/).each(limit: 1)
125
+ expect(result).to be_a(Enumerator)
126
+ expect(result.next).to eq 0
127
+ expect { result.next }.to raise_error(StopIteration)
128
+ end
129
+
117
130
  it 'is limited to 1000 iterations in case there are infinite match lengths' do
118
131
  expect(ML.of(/a*/).first(3000).size).to eq 1000
119
132
  end
@@ -138,4 +151,11 @@ RSpec.describe(Regexp::MatchLength) do
138
151
  expect(ML.of(/a*/).endless_each.first(3000).size).to eq 3000
139
152
  end
140
153
  end
154
+
155
+ describe('#inspect') do
156
+ it 'is nice' do
157
+ result = RP.parse(/a{2,4}/)[0].match_length
158
+ expect(result.inspect).to eq '#<Regexp::MatchLength<Literal> min=2 max=4>'
159
+ end
160
+ end
141
161
  end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Expression#match') do
4
+ it 'returns the #match result of the respective Regexp' do
5
+ expect(RP.parse(/a/).match('a')[0]).to eq 'a'
6
+ end
7
+
8
+ it 'can be given an offset, just like Regexp#match' do
9
+ expect(RP.parse(/./).match('ab', 1)[0]).to eq 'b'
10
+ end
11
+
12
+ it 'works with the #=~ alias' do
13
+ expect(RP.parse(/a/) =~ 'a').to be_a MatchData
14
+ end
15
+ end
16
+
17
+ RSpec.describe('Expression#match?') do
18
+ it 'returns true if the Respective Regexp matches' do
19
+ expect(RP.parse(/a/).match?('a')).to be true
20
+ end
21
+
22
+ it 'returns false if the Respective Regexp does not match' do
23
+ expect(RP.parse(/a/).match?('b')).to be false
24
+ end
25
+ end
@@ -93,5 +93,7 @@ RSpec.describe('ExpressionTests') do
93
93
  expect(seq_2.last.one_of?(meta: [:*], anchor: :eos)).to be true
94
94
  expect(seq_2.last.one_of?(meta: [:*], anchor: [:bos])).to be false
95
95
  expect(seq_2.last.one_of?(meta: [:*], anchor: %i[bos eos])).to be true
96
+
97
+ expect { root.one_of?(Object.new) }.to raise_error(ArgumentError)
96
98
  end
97
99
  end
@@ -39,6 +39,17 @@ RSpec.describe('Subexpression#traverse') do
39
39
  expect(visits).to eq 9
40
40
  end
41
41
 
42
+ specify('Subexpression#traverse without a block') do
43
+ root = RP.parse(/abc/)
44
+ enum = root.traverse
45
+
46
+ expect(enum).to be_a(Enumerator)
47
+ event, expr, idx = enum.next
48
+ expect(event).to eq(:visit)
49
+ expect(expr).to be_a(Regexp::Expression::Literal)
50
+ expect(idx).to eq(0)
51
+ end
52
+
42
53
  specify('Subexpression#walk alias') do
43
54
  root = RP.parse(/abc/)
44
55
 
@@ -81,6 +92,16 @@ RSpec.describe('Subexpression#traverse') do
81
92
  expect(indices).to eq [0, 0, 1, 0, 2]
82
93
  end
83
94
 
95
+ specify('Subexpression#each_expression without a block') do
96
+ root = RP.parse(/abc/)
97
+ enum = root.each_expression
98
+
99
+ expect(enum).to be_a(Enumerator)
100
+ expr, idx = enum.next
101
+ expect(expr).to be_a(Regexp::Expression::Literal)
102
+ expect(idx).to eq(0)
103
+ end
104
+
84
105
  specify('Subexpression#flat_map without block') do
85
106
  root = RP.parse(/a(b([c-e]+))?/)
86
107
 
@@ -0,0 +1,128 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Expression#options') do
4
+ it 'returns a hash of options/flags that affect the expression' do
5
+ exp = RP.parse(/a/ix)[0]
6
+ expect(exp).to be_a Literal
7
+ expect(exp.options).to eq(i: true, x: true)
8
+ end
9
+
10
+ it 'includes options that are locally enabled via special groups' do
11
+ exp = RP.parse(/(?x)(?m:a)/i)[1][0]
12
+ expect(exp).to be_a Literal
13
+ expect(exp.options).to eq(i: true, m: true, x: true)
14
+ end
15
+
16
+ it 'excludes locally disabled options' do
17
+ exp = RP.parse(/(?x)(?-im:a)/i)[1][0]
18
+ expect(exp).to be_a Literal
19
+ expect(exp.options).to eq(x: true)
20
+ end
21
+
22
+ it 'gives correct precedence to negative options' do
23
+ # Negative options have precedence. E.g. /(?i-i)a/ is case-sensitive.
24
+ regexp = /(?i-i:a)/
25
+ expect(regexp).to match 'a'
26
+ expect(regexp).not_to match 'A'
27
+
28
+ exp = RP.parse(regexp)[0][0]
29
+ expect(exp).to be_a Literal
30
+ expect(exp.options).to eq({})
31
+ end
32
+
33
+ it 'correctly handles multiple negative option parts' do
34
+ regexp = /(?--m--mx--) . /mx
35
+ expect(regexp).to match ' . '
36
+ expect(regexp).not_to match '.'
37
+ expect(regexp).not_to match "\n"
38
+
39
+ exp = RP.parse(regexp)[2]
40
+ expect(exp.options).to eq({})
41
+ end
42
+
43
+ it 'gives correct precedence when encountering multiple encoding flags' do
44
+ # Any encoding flag overrides all previous encoding flags. If there are
45
+ # multiple encoding flags in an options string, the last one wins.
46
+ # E.g. /(?dau)\w/ matches UTF8 chars but /(?dua)\w/ only ASCII chars.
47
+ regexp1 = /(?dau)\w/
48
+ regexp2 = /(?dua)\w/
49
+ expect(regexp1).to match 'ü'
50
+ expect(regexp2).not_to match 'ü'
51
+
52
+ exp1 = RP.parse(regexp1)[1]
53
+ exp2 = RP.parse(regexp2)[1]
54
+ expect(exp1.options).to eq(u: true)
55
+ expect(exp2.options).to eq(a: true)
56
+ end
57
+
58
+ it 'is accessible via shortcuts' do
59
+ exp = Root.build
60
+
61
+ expect { exp.options[:i] = true }
62
+ .to change { exp.i? }.from(false).to(true)
63
+ .and change { exp.ignore_case? }.from(false).to(true)
64
+ .and change { exp.case_insensitive? }.from(false).to(true)
65
+
66
+ expect { exp.options[:m] = true }
67
+ .to change { exp.m? }.from(false).to(true)
68
+ .and change { exp.multiline? }.from(false).to(true)
69
+
70
+ expect { exp.options[:x] = true }
71
+ .to change { exp.x? }.from(false).to(true)
72
+ .and change { exp.extended? }.from(false).to(true)
73
+ .and change { exp.free_spacing? }.from(false).to(true)
74
+
75
+ expect { exp.options[:a] = true }
76
+ .to change { exp.a? }.from(false).to(true)
77
+ .and change { exp.ascii_classes? }.from(false).to(true)
78
+
79
+ expect { exp.options[:d] = true }
80
+ .to change { exp.d? }.from(false).to(true)
81
+ .and change { exp.default_classes? }.from(false).to(true)
82
+
83
+ expect { exp.options[:u] = true }
84
+ .to change { exp.u? }.from(false).to(true)
85
+ .and change { exp.unicode_classes? }.from(false).to(true)
86
+ end
87
+
88
+ RSpec.shared_examples '#options' do |regexp, path, klass|
89
+ it "works for expression class #{klass}" do
90
+ exp = RP.parse(/#{regexp.source}/i).dig(*path)
91
+ expect(exp).to be_a(klass)
92
+ expect(exp).to be_i
93
+ expect(exp).not_to be_x
94
+ end
95
+ end
96
+
97
+ include_examples '#options', //, [], Root
98
+ include_examples '#options', /a/, [0], Literal
99
+ include_examples '#options', /\A/, [0], Anchor::Base
100
+ include_examples '#options', /\d/, [0], CharacterType::Base
101
+ include_examples '#options', /\n/, [0], EscapeSequence::Base
102
+ include_examples '#options', /\K/, [0], Keep::Mark
103
+ include_examples '#options', /./, [0], CharacterType::Any
104
+ include_examples '#options', /(a)/, [0], Group::Base
105
+ include_examples '#options', /(a)/, [0, 0], Literal
106
+ include_examples '#options', /(?=a)/, [0], Assertion::Base
107
+ include_examples '#options', /(?=a)/, [0, 0], Literal
108
+ include_examples '#options', /(a|b)/, [0], Group::Base
109
+ include_examples '#options', /(a|b)/, [0, 0], Alternation
110
+ include_examples '#options', /(a|b)/, [0, 0, 0], Alternative
111
+ include_examples '#options', /(a|b)/, [0, 0, 0, 0], Literal
112
+ include_examples '#options', /(a)\1/, [1], Backreference::Base
113
+ include_examples '#options', /(a)\k<1>/, [1], Backreference::Number
114
+ include_examples '#options', /(a)\g<1>/, [1], Backreference::NumberCall
115
+ include_examples '#options', /[a]/, [0], CharacterSet
116
+ include_examples '#options', /[a]/, [0, 0], Literal
117
+ include_examples '#options', /[a-z]/, [0, 0], CharacterSet::Range
118
+ include_examples '#options', /[a-z]/, [0, 0, 0], Literal
119
+ include_examples '#options', /[a&&z]/, [0, 0], CharacterSet::Intersection
120
+ include_examples '#options', /[a&&z]/, [0, 0, 0], CharacterSet::IntersectedSequence
121
+ include_examples '#options', /[a&&z]/, [0, 0, 0, 0], Literal
122
+ include_examples '#options', /[[:ascii:]]/, [0, 0], PosixClass
123
+ include_examples '#options', /\p{word}/, [0], UnicodeProperty::Base
124
+ include_examples '#options', /(a)(?(1)b|c)/, [1], Conditional::Expression
125
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 0], Conditional::Condition
126
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 1], Conditional::Branch
127
+ include_examples '#options', /(a)(?(1)b|c)/, [1, 1, 0], Literal
128
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Expression::Root) do
4
+ describe('#initialize') do
5
+ it 'supports the old, nonstandard arity for backwards compatibility' do
6
+ expect { Root.new }.to output.to_stderr
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe(Regexp::Expression::Sequence) do
4
+ describe('#initialize') do
5
+ it 'supports the old, nonstandard arity for backwards compatibility' do
6
+ expect { Sequence.new(0, 0, 0) }.to output.to_stderr
7
+ end
8
+ end
9
+ end
@@ -1,123 +1,53 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe('Conditional lexing') do
4
- # Basic lexer output and nesting tests
5
- tests = {
6
- '(?<A>a)(?(<A>)b|c)' => [3, :conditional, :open, '(?', 7, 9, 0, 0, 0],
7
- '(?<B>a)(?(<B>)b|c)' => [4, :conditional, :condition, '(<B>)', 9, 14, 0, 0, 1],
8
- '(?<C>a)(?(<C>)b|c)' => [6, :conditional, :separator, '|', 15, 16, 0, 0, 1],
9
- '(?<D>a)(?(<D>)b|c)' => [8, :conditional, :close, ')', 17, 18, 0, 0, 0],
10
- }
11
-
12
- tests.each_with_index do |(pattern, (index, type, token, text, ts, te, level, set_level, conditional_level)), count|
13
- specify("lexer_#{type}_#{token}_#{count}") do
14
- tokens = RL.lex(pattern)
15
- struct = tokens.at(index)
16
-
17
- expect(struct.type).to eq type
18
- expect(struct.token).to eq token
19
- expect(struct.text).to eq text
20
- expect(struct.ts).to eq ts
21
- expect(struct.te).to eq te
22
- expect(struct.level).to eq level
23
- expect(struct.set_level).to eq set_level
24
- expect(struct.conditional_level).to eq conditional_level
25
- end
26
- end
27
-
28
- specify('lexer conditional mixed nesting') do
29
- regexp = '((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))'
30
- tokens = RL.lex(regexp)
31
-
32
- [
33
- [ 0, :group, :capture, '(', 0, 1, 0, 0, 0],
34
- [ 1, :group, :named, '(?<A>', 1, 6, 1, 0, 0],
35
-
36
- [ 5, :conditional, :open, '(?', 13, 15, 2, 0, 0],
37
- [ 6, :conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
38
- [ 8, :conditional, :separator, '|', 21, 22, 2, 0, 1],
39
-
40
- [10, :conditional, :open, '(?', 23, 25, 3, 0, 1],
41
- [11, :conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
42
-
43
- [12, :set, :open, '[', 30, 31, 3, 0, 2],
44
- [13, :literal, :literal, 'e', 31, 32, 3, 1, 2],
45
- [14, :set, :range, '-', 32, 33, 3, 1, 2],
46
- [15, :literal, :literal, 'g', 33, 34, 3, 1, 2],
47
- [16, :set, :close, ']', 34, 35, 3, 0, 2],
48
-
49
- [17, :conditional, :separator, '|', 35, 36, 3, 0, 2],
50
- [23, :conditional, :close, ')', 41, 42, 3, 0, 1],
51
- [25, :conditional, :close, ')', 43, 44, 2, 0, 0],
52
-
53
- [26, :group, :close, ')', 44, 45, 1, 0, 0],
54
- [27, :group, :close, ')', 45, 46, 0, 0, 0]
55
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
56
- struct = tokens.at(index)
57
-
58
- expect(struct.type).to eq type
59
- expect(struct.token).to eq token
60
- expect(struct.text).to eq text
61
- expect(struct.ts).to eq ts
62
- expect(struct.te).to eq te
63
- expect(struct.level).to eq level
64
- expect(struct.set_level).to eq set_level
65
- expect(struct.conditional_level).to eq conditional_level
66
- end
67
- end
68
-
69
- specify('lexer conditional deep nesting') do
70
- regexp = '(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))'
71
- tokens = RL.lex(regexp)
72
-
73
- [
74
- [ 9, :conditional, :open, '(?', 9, 11, 0, 0, 0],
75
- [10, :conditional, :condition, '(1)', 11, 14, 0, 0, 1],
76
-
77
- [11, :conditional, :open, '(?', 14, 16, 0, 0, 1],
78
- [12, :conditional, :condition, '(2)', 16, 19, 0, 0, 2],
79
-
80
- [13, :conditional, :open, '(?', 19, 21, 0, 0, 2],
81
- [14, :conditional, :condition, '(3)', 21, 24, 0, 0, 3],
82
-
83
- [16, :conditional, :separator, '|', 25, 26, 0, 0, 3],
84
-
85
- [18, :conditional, :close, ')', 27, 28, 0, 0, 2],
86
- [19, :conditional, :close, ')', 28, 29, 0, 0, 1],
87
-
88
- [20, :conditional, :separator, '|', 29, 30, 0, 0, 1],
89
-
90
- [21, :conditional, :open, '(?', 30, 32, 0, 0, 1],
91
- [22, :conditional, :condition, '(3)', 32, 35, 0, 0, 2],
92
-
93
- [23, :conditional, :open, '(?', 35, 37, 0, 0, 2],
94
- [24, :conditional, :condition, '(2)', 37, 40, 0, 0, 3],
95
-
96
- [26, :conditional, :separator, '|', 41, 42, 0, 0, 3],
97
-
98
- [28, :conditional, :close, ')', 43, 44, 0, 0, 2],
99
-
100
- [29, :conditional, :separator, '|', 44, 45, 0, 0, 2],
101
-
102
- [30, :conditional, :open, '(?', 45, 47, 0, 0, 2],
103
- [31, :conditional, :condition, '(1)', 47, 50, 0, 0, 3],
104
-
105
- [33, :conditional, :separator, '|', 51, 52, 0, 0, 3],
106
-
107
- [35, :conditional, :close, ')', 53, 54, 0, 0, 2],
108
- [36, :conditional, :close, ')', 54, 55, 0, 0, 1],
109
- [37, :conditional, :close, ')', 55, 56, 0, 0, 0]
110
- ].each do |index, type, token, text, ts, te, level, set_level, conditional_level|
111
- struct = tokens.at(index)
112
-
113
- expect(struct.type).to eq type
114
- expect(struct.token).to eq token
115
- expect(struct.text).to eq text
116
- expect(struct.ts).to eq ts
117
- expect(struct.te).to eq te
118
- expect(struct.level).to eq level
119
- expect(struct.set_level).to eq set_level
120
- expect(struct.conditional_level).to eq conditional_level
121
- end
122
- end
4
+ include_examples 'lex', /(?<A>a)(?(<A>)b|c)/,
5
+ 3 => [:conditional, :open, '(?', 7, 9, 0, 0, 0],
6
+ 4 => [:conditional, :condition, '(<A>)', 9, 14, 0, 0, 1],
7
+ 6 => [:conditional, :separator, '|', 15, 16, 0, 0, 1],
8
+ 8 => [:conditional, :close, ')', 17, 18, 0, 0, 0]
9
+
10
+ include_examples 'lex', /((?<A>a)(?<B>(?(<A>)b|((?(<B>)[e-g]|[h-j])))))/,
11
+ 0 => [:group, :capture, '(', 0, 1, 0, 0, 0],
12
+ 1 => [:group, :named, '(?<A>', 1, 6, 1, 0, 0],
13
+ 5 => [:conditional, :open, '(?', 13, 15, 2, 0, 0],
14
+ 6 => [:conditional, :condition, '(<A>)', 15, 20, 2, 0, 1],
15
+ 8 => [:conditional, :separator, '|', 21, 22, 2, 0, 1],
16
+ 10 => [:conditional, :open, '(?', 23, 25, 3, 0, 1],
17
+ 11 => [:conditional, :condition, '(<B>)', 25, 30, 3, 0, 2],
18
+ 12 => [:set, :open, '[', 30, 31, 3, 0, 2],
19
+ 13 => [:literal, :literal, 'e', 31, 32, 3, 1, 2],
20
+ 14 => [:set, :range, '-', 32, 33, 3, 1, 2],
21
+ 15 => [:literal, :literal, 'g', 33, 34, 3, 1, 2],
22
+ 16 => [:set, :close, ']', 34, 35, 3, 0, 2],
23
+ 17 => [:conditional, :separator, '|', 35, 36, 3, 0, 2],
24
+ 23 => [:conditional, :close, ')', 41, 42, 3, 0, 1],
25
+ 25 => [:conditional, :close, ')', 43, 44, 2, 0, 0],
26
+ 26 => [:group, :close, ')', 44, 45, 1, 0, 0],
27
+ 27 => [:group, :close, ')', 45, 46, 0, 0, 0]
28
+
29
+ include_examples 'lex', /(a(b(c)))(?(1)(?(2)(?(3)d|e))|(?(3)(?(2)f|g)|(?(1)f|g)))/,
30
+ 9 => [:conditional, :open, '(?', 9, 11, 0, 0, 0],
31
+ 10 => [:conditional, :condition, '(1)', 11, 14, 0, 0, 1],
32
+ 11 => [:conditional, :open, '(?', 14, 16, 0, 0, 1],
33
+ 12 => [:conditional, :condition, '(2)', 16, 19, 0, 0, 2],
34
+ 13 => [:conditional, :open, '(?', 19, 21, 0, 0, 2],
35
+ 14 => [:conditional, :condition, '(3)', 21, 24, 0, 0, 3],
36
+ 16 => [:conditional, :separator, '|', 25, 26, 0, 0, 3],
37
+ 18 => [:conditional, :close, ')', 27, 28, 0, 0, 2],
38
+ 19 => [:conditional, :close, ')', 28, 29, 0, 0, 1],
39
+ 20 => [:conditional, :separator, '|', 29, 30, 0, 0, 1],
40
+ 21 => [:conditional, :open, '(?', 30, 32, 0, 0, 1],
41
+ 22 => [:conditional, :condition, '(3)', 32, 35, 0, 0, 2],
42
+ 23 => [:conditional, :open, '(?', 35, 37, 0, 0, 2],
43
+ 24 => [:conditional, :condition, '(2)', 37, 40, 0, 0, 3],
44
+ 26 => [:conditional, :separator, '|', 41, 42, 0, 0, 3],
45
+ 28 => [:conditional, :close, ')', 43, 44, 0, 0, 2],
46
+ 29 => [:conditional, :separator, '|', 44, 45, 0, 0, 2],
47
+ 30 => [:conditional, :open, '(?', 45, 47, 0, 0, 2],
48
+ 31 => [:conditional, :condition, '(1)', 47, 50, 0, 0, 3],
49
+ 33 => [:conditional, :separator, '|', 51, 52, 0, 0, 3],
50
+ 35 => [:conditional, :close, ')', 53, 54, 0, 0, 2],
51
+ 36 => [:conditional, :close, ')', 54, 55, 0, 0, 1],
52
+ 37 => [:conditional, :close, ')', 55, 56, 0, 0, 0]
123
53
  end