regexp_parser 1.4.0 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +66 -1
  3. data/Gemfile +3 -3
  4. data/README.md +11 -18
  5. data/Rakefile +3 -4
  6. data/lib/regexp_parser/expression.rb +28 -53
  7. data/lib/regexp_parser/expression/classes/backref.rb +18 -10
  8. data/lib/regexp_parser/expression/classes/conditional.rb +7 -2
  9. data/lib/regexp_parser/expression/classes/escape.rb +0 -4
  10. data/lib/regexp_parser/expression/classes/group.rb +4 -2
  11. data/lib/regexp_parser/expression/classes/keep.rb +1 -3
  12. data/lib/regexp_parser/expression/methods/match.rb +13 -0
  13. data/lib/regexp_parser/expression/methods/match_length.rb +172 -0
  14. data/lib/regexp_parser/expression/methods/options.rb +35 -0
  15. data/lib/regexp_parser/expression/methods/strfregexp.rb +0 -1
  16. data/lib/regexp_parser/expression/methods/tests.rb +6 -15
  17. data/lib/regexp_parser/expression/methods/traverse.rb +3 -1
  18. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  19. data/lib/regexp_parser/expression/sequence.rb +3 -6
  20. data/lib/regexp_parser/expression/sequence_operation.rb +2 -6
  21. data/lib/regexp_parser/expression/subexpression.rb +3 -5
  22. data/lib/regexp_parser/lexer.rb +30 -44
  23. data/lib/regexp_parser/parser.rb +47 -24
  24. data/lib/regexp_parser/scanner.rb +1228 -1367
  25. data/lib/regexp_parser/scanner/char_type.rl +0 -3
  26. data/lib/regexp_parser/scanner/properties/long.yml +15 -1
  27. data/lib/regexp_parser/scanner/properties/short.yml +5 -0
  28. data/lib/regexp_parser/scanner/scanner.rl +101 -194
  29. data/lib/regexp_parser/syntax/tokens.rb +2 -10
  30. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +30 -0
  31. data/lib/regexp_parser/syntax/versions/2.6.2.rb +10 -0
  32. data/lib/regexp_parser/syntax/versions/2.6.3.rb +10 -0
  33. data/lib/regexp_parser/version.rb +1 -1
  34. data/regexp_parser.gemspec +2 -2
  35. data/spec/expression/base_spec.rb +94 -0
  36. data/spec/expression/clone_spec.rb +120 -0
  37. data/spec/expression/conditional_spec.rb +89 -0
  38. data/spec/expression/free_space_spec.rb +27 -0
  39. data/spec/expression/methods/match_length_spec.rb +161 -0
  40. data/spec/expression/methods/match_spec.rb +25 -0
  41. data/spec/expression/methods/strfregexp_spec.rb +224 -0
  42. data/spec/expression/methods/tests_spec.rb +99 -0
  43. data/spec/expression/methods/traverse_spec.rb +161 -0
  44. data/spec/expression/options_spec.rb +128 -0
  45. data/spec/expression/root_spec.rb +9 -0
  46. data/spec/expression/sequence_spec.rb +9 -0
  47. data/spec/expression/subexpression_spec.rb +50 -0
  48. data/spec/expression/to_h_spec.rb +26 -0
  49. data/spec/expression/to_s_spec.rb +100 -0
  50. data/spec/lexer/all_spec.rb +22 -0
  51. data/spec/lexer/conditionals_spec.rb +53 -0
  52. data/spec/lexer/delimiters_spec.rb +68 -0
  53. data/spec/lexer/escapes_spec.rb +14 -0
  54. data/spec/lexer/keep_spec.rb +10 -0
  55. data/spec/lexer/literals_spec.rb +89 -0
  56. data/spec/lexer/nesting_spec.rb +99 -0
  57. data/spec/lexer/refcalls_spec.rb +55 -0
  58. data/spec/parser/all_spec.rb +43 -0
  59. data/spec/parser/alternation_spec.rb +88 -0
  60. data/spec/parser/anchors_spec.rb +17 -0
  61. data/spec/parser/conditionals_spec.rb +179 -0
  62. data/spec/parser/errors_spec.rb +30 -0
  63. data/spec/parser/escapes_spec.rb +121 -0
  64. data/spec/parser/free_space_spec.rb +130 -0
  65. data/spec/parser/groups_spec.rb +108 -0
  66. data/spec/parser/keep_spec.rb +6 -0
  67. data/spec/parser/posix_classes_spec.rb +8 -0
  68. data/spec/parser/properties_spec.rb +115 -0
  69. data/spec/parser/quantifiers_spec.rb +52 -0
  70. data/spec/parser/refcalls_spec.rb +112 -0
  71. data/spec/parser/set/intersections_spec.rb +127 -0
  72. data/spec/parser/set/ranges_spec.rb +111 -0
  73. data/spec/parser/sets_spec.rb +178 -0
  74. data/spec/parser/types_spec.rb +18 -0
  75. data/spec/scanner/all_spec.rb +18 -0
  76. data/spec/scanner/anchors_spec.rb +21 -0
  77. data/spec/scanner/conditionals_spec.rb +128 -0
  78. data/spec/scanner/delimiters_spec.rb +52 -0
  79. data/spec/scanner/errors_spec.rb +67 -0
  80. data/spec/scanner/escapes_spec.rb +53 -0
  81. data/spec/scanner/free_space_spec.rb +133 -0
  82. data/spec/scanner/groups_spec.rb +52 -0
  83. data/spec/scanner/keep_spec.rb +10 -0
  84. data/spec/scanner/literals_spec.rb +49 -0
  85. data/spec/scanner/meta_spec.rb +18 -0
  86. data/spec/scanner/properties_spec.rb +64 -0
  87. data/spec/scanner/quantifiers_spec.rb +20 -0
  88. data/spec/scanner/refcalls_spec.rb +36 -0
  89. data/spec/scanner/sets_spec.rb +102 -0
  90. data/spec/scanner/types_spec.rb +14 -0
  91. data/spec/spec_helper.rb +15 -0
  92. data/{test → spec}/support/runner.rb +9 -8
  93. data/spec/support/shared_examples.rb +77 -0
  94. data/{test → spec}/support/warning_extractor.rb +5 -7
  95. data/spec/syntax/syntax_spec.rb +48 -0
  96. data/spec/syntax/syntax_token_map_spec.rb +23 -0
  97. data/spec/syntax/versions/1.8.6_spec.rb +17 -0
  98. data/spec/syntax/versions/1.9.1_spec.rb +10 -0
  99. data/spec/syntax/versions/1.9.3_spec.rb +9 -0
  100. data/spec/syntax/versions/2.0.0_spec.rb +13 -0
  101. data/spec/syntax/versions/2.2.0_spec.rb +9 -0
  102. data/spec/syntax/versions/aliases_spec.rb +37 -0
  103. data/spec/token/token_spec.rb +85 -0
  104. metadata +149 -144
  105. data/test/expression/test_all.rb +0 -12
  106. data/test/expression/test_base.rb +0 -90
  107. data/test/expression/test_clone.rb +0 -89
  108. data/test/expression/test_conditionals.rb +0 -113
  109. data/test/expression/test_free_space.rb +0 -35
  110. data/test/expression/test_set.rb +0 -84
  111. data/test/expression/test_strfregexp.rb +0 -230
  112. data/test/expression/test_subexpression.rb +0 -58
  113. data/test/expression/test_tests.rb +0 -99
  114. data/test/expression/test_to_h.rb +0 -59
  115. data/test/expression/test_to_s.rb +0 -104
  116. data/test/expression/test_traverse.rb +0 -161
  117. data/test/helpers.rb +0 -10
  118. data/test/lexer/test_all.rb +0 -41
  119. data/test/lexer/test_conditionals.rb +0 -127
  120. data/test/lexer/test_keep.rb +0 -24
  121. data/test/lexer/test_literals.rb +0 -130
  122. data/test/lexer/test_nesting.rb +0 -132
  123. data/test/lexer/test_refcalls.rb +0 -56
  124. data/test/parser/set/test_intersections.rb +0 -127
  125. data/test/parser/set/test_ranges.rb +0 -111
  126. data/test/parser/test_all.rb +0 -64
  127. data/test/parser/test_alternation.rb +0 -92
  128. data/test/parser/test_anchors.rb +0 -34
  129. data/test/parser/test_conditionals.rb +0 -187
  130. data/test/parser/test_errors.rb +0 -63
  131. data/test/parser/test_escapes.rb +0 -134
  132. data/test/parser/test_free_space.rb +0 -139
  133. data/test/parser/test_groups.rb +0 -289
  134. data/test/parser/test_keep.rb +0 -21
  135. data/test/parser/test_posix_classes.rb +0 -27
  136. data/test/parser/test_properties.rb +0 -134
  137. data/test/parser/test_quantifiers.rb +0 -301
  138. data/test/parser/test_refcalls.rb +0 -186
  139. data/test/parser/test_sets.rb +0 -179
  140. data/test/parser/test_types.rb +0 -50
  141. data/test/scanner/test_all.rb +0 -38
  142. data/test/scanner/test_anchors.rb +0 -38
  143. data/test/scanner/test_conditionals.rb +0 -184
  144. data/test/scanner/test_errors.rb +0 -91
  145. data/test/scanner/test_escapes.rb +0 -56
  146. data/test/scanner/test_free_space.rb +0 -200
  147. data/test/scanner/test_groups.rb +0 -79
  148. data/test/scanner/test_keep.rb +0 -35
  149. data/test/scanner/test_literals.rb +0 -89
  150. data/test/scanner/test_meta.rb +0 -40
  151. data/test/scanner/test_properties.rb +0 -312
  152. data/test/scanner/test_quantifiers.rb +0 -37
  153. data/test/scanner/test_refcalls.rb +0 -52
  154. data/test/scanner/test_scripts.rb +0 -53
  155. data/test/scanner/test_sets.rb +0 -119
  156. data/test/scanner/test_types.rb +0 -35
  157. data/test/scanner/test_unicode_blocks.rb +0 -30
  158. data/test/support/disable_autotest.rb +0 -8
  159. data/test/syntax/test_all.rb +0 -6
  160. data/test/syntax/test_syntax.rb +0 -61
  161. data/test/syntax/test_syntax_token_map.rb +0 -25
  162. data/test/syntax/versions/test_1.8.rb +0 -55
  163. data/test/syntax/versions/test_1.9.1.rb +0 -36
  164. data/test/syntax/versions/test_1.9.3.rb +0 -32
  165. data/test/syntax/versions/test_2.0.0.rb +0 -37
  166. data/test/syntax/versions/test_2.2.0.rb +0 -32
  167. data/test/syntax/versions/test_aliases.rb +0 -129
  168. data/test/syntax/versions/test_all.rb +0 -5
  169. data/test/test_all.rb +0 -5
  170. data/test/token/test_all.rb +0 -2
  171. data/test/token/test_token.rb +0 -107
@@ -0,0 +1,88 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Alternation parsing') do
4
+ let(:root) { RP.parse('(ab??|cd*|ef+)*|(gh|ij|kl)?') }
5
+
6
+ specify('parse alternation root') do
7
+ e = root[0]
8
+ expect(e).to be_a(Alternation)
9
+ end
10
+
11
+ specify('parse alternation alts') do
12
+ alts = root[0].alternatives
13
+
14
+ expect(alts[0]).to be_a(Alternative)
15
+ expect(alts[1]).to be_a(Alternative)
16
+
17
+ expect(alts[0][0]).to be_a(Group::Capture)
18
+ expect(alts[1][0]).to be_a(Group::Capture)
19
+
20
+ expect(alts.length).to eq 2
21
+ end
22
+
23
+ specify('parse alternation nested') do
24
+ e = root[0].alternatives[0][0][0]
25
+
26
+ expect(e).to be_a(Alternation)
27
+ end
28
+
29
+ specify('parse alternation nested sequence') do
30
+ alts = root[0][0]
31
+ nested = alts[0][0][0]
32
+
33
+ expect(nested).to be_a(Alternative)
34
+
35
+ expect(nested[0]).to be_a(Literal)
36
+ expect(nested[1]).to be_a(Literal)
37
+ expect(nested.expressions.length).to eq 2
38
+ end
39
+
40
+ specify('parse alternation nested groups') do
41
+ root = RP.parse('(i|ey|([ougfd]+)|(ney))')
42
+
43
+ alts = root[0][0].alternatives
44
+ expect(alts.length).to eq 4
45
+ end
46
+
47
+ specify('parse alternation grouped alts') do
48
+ root = RP.parse('ca((n)|(t)|(ll)|(b))')
49
+
50
+ alts = root[1][0].alternatives
51
+
52
+ expect(alts.length).to eq 4
53
+
54
+ expect(alts[0]).to be_a(Alternative)
55
+ expect(alts[1]).to be_a(Alternative)
56
+ expect(alts[2]).to be_a(Alternative)
57
+ expect(alts[3]).to be_a(Alternative)
58
+ end
59
+
60
+ specify('parse alternation nested grouped alts') do
61
+ root = RP.parse('ca((n|t)|(ll|b))')
62
+
63
+ alts = root[1][0].alternatives
64
+
65
+ expect(alts.length).to eq 2
66
+
67
+ expect(alts[0]).to be_a(Alternative)
68
+ expect(alts[1]).to be_a(Alternative)
69
+
70
+ subalts = root[1][0][0][0][0].alternatives
71
+
72
+ expect(alts.length).to eq 2
73
+
74
+ expect(subalts[0]).to be_a(Alternative)
75
+ expect(subalts[1]).to be_a(Alternative)
76
+ end
77
+
78
+ specify('parse alternation continues after nesting') do
79
+ root = RP.parse(/a|(b)c/)
80
+
81
+ seq = root[0][1].expressions
82
+
83
+ expect(seq.length).to eq 2
84
+
85
+ expect(seq[0]).to be_a(Group::Capture)
86
+ expect(seq[1]).to be_a(Literal)
87
+ end
88
+ end
@@ -0,0 +1,17 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Anchor parsing') do
4
+ include_examples 'parse', /^a/, 0 => [:anchor, :bol, Anchor::BOL]
5
+ include_examples 'parse', /a$/, 1 => [:anchor, :eol, Anchor::EOL]
6
+
7
+ include_examples 'parse', /\Aa/, 0 => [:anchor, :bos, Anchor::BOS]
8
+ include_examples 'parse', /a\z/, 1 => [:anchor, :eos, Anchor::EOS]
9
+ include_examples 'parse', /a\Z/, 1 => [:anchor, :eos_ob_eol, Anchor::EOSobEOL]
10
+
11
+ include_examples 'parse', /a\b/, 1 => [:anchor, :word_boundary, Anchor::WordBoundary]
12
+ include_examples 'parse', /a\B/, 1 => [:anchor, :nonword_boundary, Anchor::NonWordBoundary]
13
+
14
+ include_examples 'parse', /a\G/, 1 => [:anchor, :match_start, Anchor::MatchStart]
15
+
16
+ include_examples 'parse', /\\A/, 0 => [:escape, :backslash, EscapeSequence::Literal]
17
+ end
@@ -0,0 +1,179 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Conditional parsing') do
4
+ specify('parse conditional') do
5
+ regexp = /(?<A>a)(?(<A>)T|F)/
6
+
7
+ root = RP.parse(regexp, 'ruby/2.0')
8
+ exp = root[1]
9
+
10
+ expect(exp).to be_a(Conditional::Expression)
11
+
12
+ expect(exp.type).to eq :conditional
13
+ expect(exp.token).to eq :open
14
+ expect(exp.to_s).to eq '(?(<A>)T|F)'
15
+ expect(exp.reference).to eq 'A'
16
+ end
17
+
18
+ specify('parse conditional condition') do
19
+ regexp = /(?<A>a)(?(<A>)T|F)/
20
+
21
+ root = RP.parse(regexp, 'ruby/2.0')
22
+ exp = root[1].condition
23
+
24
+ expect(exp).to be_a(Conditional::Condition)
25
+
26
+ expect(exp.type).to eq :conditional
27
+ expect(exp.token).to eq :condition
28
+ expect(exp.to_s).to eq '(<A>)'
29
+ expect(exp.reference).to eq 'A'
30
+ expect(exp.referenced_expression.to_s).to eq '(?<A>a)'
31
+ end
32
+
33
+ specify('parse conditional condition with number ref') do
34
+ regexp = /(a)(?(1)T|F)/
35
+
36
+ root = RP.parse(regexp, 'ruby/2.0')
37
+ exp = root[1].condition
38
+
39
+ expect(exp).to be_a(Conditional::Condition)
40
+
41
+ expect(exp.type).to eq :conditional
42
+ expect(exp.token).to eq :condition
43
+ expect(exp.to_s).to eq '(1)'
44
+ expect(exp.reference).to eq 1
45
+ expect(exp.referenced_expression.to_s).to eq '(a)'
46
+ end
47
+
48
+ specify('parse conditional nested groups') do
49
+ regexp = /((a)|(b)|((?(2)(c(d|e)+)?|(?(3)f|(?(4)(g|(h)(i)))))))/
50
+
51
+ root = RP.parse(regexp, 'ruby/2.0')
52
+
53
+ expect(root.to_s).to eq regexp.source
54
+
55
+ group = root.first
56
+ expect(group).to be_instance_of(Group::Capture)
57
+
58
+ alt = group.first
59
+ expect(alt).to be_instance_of(Alternation)
60
+ expect(alt.length).to eq 3
61
+
62
+ expect(alt.map(&:first)).to all(be_a Group::Capture)
63
+
64
+ subgroup = alt[2].first
65
+ conditional = subgroup.first
66
+
67
+ expect(conditional).to be_instance_of(Conditional::Expression)
68
+ expect(conditional.length).to eq 3
69
+
70
+ expect(conditional[0]).to be_instance_of(Conditional::Condition)
71
+ expect(conditional[0].to_s).to eq '(2)'
72
+
73
+ condition = conditional.condition
74
+ expect(condition).to be_instance_of(Conditional::Condition)
75
+ expect(condition.to_s).to eq '(2)'
76
+
77
+ branches = conditional.branches
78
+ expect(branches.length).to eq 2
79
+ expect(branches).to be_instance_of(Array)
80
+ end
81
+
82
+ specify('parse conditional nested') do
83
+ regexp = /(a(b(c(d)(e))))(?(1)(?(2)d|(?(3)e|f))|(?(4)(?(5)g|h)))/
84
+
85
+ root = RP.parse(regexp, 'ruby/2.0')
86
+
87
+ expect(root.to_s).to eq regexp.source
88
+
89
+ {
90
+ 1 => [2, root[1]],
91
+ 2 => [2, root[1][1][0]],
92
+ 3 => [2, root[1][1][0][2][0]],
93
+ 4 => [1, root[1][2][0]],
94
+ 5 => [2, root[1][2][0][1][0]]
95
+ }.each do |index, example|
96
+ branch_count, exp = example
97
+
98
+ expect(exp).to be_instance_of(Conditional::Expression)
99
+ expect(exp.condition.to_s).to eq "(#{index})"
100
+ expect(exp.branches.length).to eq branch_count
101
+ end
102
+ end
103
+
104
+ specify('parse conditional nested alternation') do
105
+ regexp = /(a)(?(1)(b|c|d)|(e|f|g))(h)(?(2)(i|j|k)|(l|m|n))|o|p/
106
+
107
+ root = RP.parse(regexp, 'ruby/2.0')
108
+
109
+ expect(root.to_s).to eq regexp.source
110
+
111
+ expect(root.first).to be_instance_of(Alternation)
112
+
113
+ [
114
+ [3, 'b|c|d', root[0][0][1][1][0][0]],
115
+ [3, 'e|f|g', root[0][0][1][2][0][0]],
116
+ [3, 'i|j|k', root[0][0][3][1][0][0]],
117
+ [3, 'l|m|n', root[0][0][3][2][0][0]]
118
+ ].each do |example|
119
+ alt_count, alt_text, exp = example
120
+
121
+ expect(exp).to be_instance_of(Alternation)
122
+ expect(exp.to_s).to eq alt_text
123
+ expect(exp.alternatives.length).to eq alt_count
124
+ end
125
+ end
126
+
127
+ specify('parse conditional extra separator') do
128
+ regexp = /(?<A>a)(?(<A>)T|)/
129
+
130
+ root = RP.parse(regexp, 'ruby/2.0')
131
+ branches = root[1].branches
132
+
133
+ expect(branches.length).to eq 2
134
+
135
+ seq_1, seq_2 = branches
136
+
137
+ [seq_1, seq_2].each do |seq|
138
+ expect(seq).to be_a(Sequence)
139
+
140
+ expect(seq.type).to eq :expression
141
+ expect(seq.token).to eq :sequence
142
+ end
143
+
144
+ expect(seq_1.to_s).to eq 'T'
145
+ expect(seq_2.to_s).to eq ''
146
+ end
147
+
148
+ specify('parse conditional quantified') do
149
+ regexp = /(foo)(?(1)\d|(\w)){42}/
150
+
151
+ root = RP.parse(regexp, 'ruby/2.0')
152
+ conditional = root[1]
153
+
154
+ expect(conditional).to be_quantified
155
+ expect(conditional.quantifier.to_s).to eq '{42}'
156
+ expect(conditional.to_s).to eq '(?(1)\\d|(\\w)){42}'
157
+ expect(conditional.branches.any?(&:quantified?)).to be false
158
+ end
159
+
160
+ specify('parse conditional branch content quantified') do
161
+ regexp = /(foo)(?(1)\d{23}|(\w){42})/
162
+
163
+ root = RP.parse(regexp, 'ruby/2.0')
164
+ conditional = root[1]
165
+
166
+ expect(conditional).not_to be_quantified
167
+ expect(conditional.branches.any?(&:quantified?)).to be false
168
+ expect(conditional.branches[0][0]).to be_quantified
169
+ expect(conditional.branches[0][0].quantifier.to_s).to eq '{23}'
170
+ expect(conditional.branches[1][0]).to be_quantified
171
+ expect(conditional.branches[1][0].quantifier.to_s).to eq '{42}'
172
+ end
173
+
174
+ specify('parse conditional excessive branches') do
175
+ regexp = '(?<A>a)(?(<A>)T|F|X)'
176
+
177
+ expect { RP.parse(regexp, 'ruby/2.0') }.to raise_error(Conditional::TooManyBranches)
178
+ end
179
+ end
@@ -0,0 +1,30 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('Parsing errors') do
4
+ let(:parser) { Regexp::Parser.new }
5
+ before { parser.parse(/foo/) } # initializes ivars
6
+
7
+ it('raises UnknownTokenTypeError for unknown token types') do
8
+ expect { parser.send(:parse_token, Regexp::Token.new(:foo, :bar)) }
9
+ .to raise_error(Regexp::Parser::UnknownTokenTypeError)
10
+ end
11
+
12
+ RSpec.shared_examples 'UnknownTokenError' do |type, token|
13
+ it "raises for unkown tokens of type #{type}" do
14
+ expect { parser.send(:parse_token, Regexp::Token.new(type, :foo)) }
15
+ .to raise_error(Regexp::Parser::UnknownTokenError)
16
+ end
17
+ end
18
+
19
+ include_examples 'UnknownTokenError', :anchor
20
+ include_examples 'UnknownTokenError', :backref
21
+ include_examples 'UnknownTokenError', :conditional
22
+ include_examples 'UnknownTokenError', :free_space
23
+ include_examples 'UnknownTokenError', :group
24
+ include_examples 'UnknownTokenError', :meta
25
+ include_examples 'UnknownTokenError', :nonproperty
26
+ include_examples 'UnknownTokenError', :property
27
+ include_examples 'UnknownTokenError', :quantifier
28
+ include_examples 'UnknownTokenError', :set
29
+ include_examples 'UnknownTokenError', :type
30
+ end
@@ -0,0 +1,121 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('EscapeSequence parsing') do
4
+ include_examples 'parse', /a\ac/, 1 => [:escape, :bell, EscapeSequence::Bell]
5
+ include_examples 'parse', /a\ec/, 1 => [:escape, :escape, EscapeSequence::AsciiEscape]
6
+ include_examples 'parse', /a\fc/, 1 => [:escape, :form_feed, EscapeSequence::FormFeed]
7
+ include_examples 'parse', /a\nc/, 1 => [:escape, :newline, EscapeSequence::Newline]
8
+ include_examples 'parse', /a\rc/, 1 => [:escape, :carriage, EscapeSequence::Return]
9
+ include_examples 'parse', /a\tc/, 1 => [:escape, :tab, EscapeSequence::Tab]
10
+ include_examples 'parse', /a\vc/, 1 => [:escape, :vertical_tab, EscapeSequence::VerticalTab]
11
+
12
+ # meta character escapes
13
+ include_examples 'parse', /a\.c/, 1 => [:escape, :dot, EscapeSequence::Literal]
14
+ include_examples 'parse', /a\?c/, 1 => [:escape, :zero_or_one, EscapeSequence::Literal]
15
+ include_examples 'parse', /a\*c/, 1 => [:escape, :zero_or_more, EscapeSequence::Literal]
16
+ include_examples 'parse', /a\+c/, 1 => [:escape, :one_or_more, EscapeSequence::Literal]
17
+ include_examples 'parse', /a\|c/, 1 => [:escape, :alternation, EscapeSequence::Literal]
18
+ include_examples 'parse', /a\(c/, 1 => [:escape, :group_open, EscapeSequence::Literal]
19
+ include_examples 'parse', /a\)c/, 1 => [:escape, :group_close, EscapeSequence::Literal]
20
+ include_examples 'parse', /a\{c/, 1 => [:escape, :interval_open, EscapeSequence::Literal]
21
+ include_examples 'parse', /a\}c/, 1 => [:escape, :interval_close, EscapeSequence::Literal]
22
+
23
+ # unicode escapes
24
+ include_examples 'parse', /a\u0640/, 1 => [:escape, :codepoint, EscapeSequence::Codepoint]
25
+ include_examples 'parse', /a\u{41 1F60D}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
26
+ include_examples 'parse', /a\u{10FFFF}/, 1 => [:escape, :codepoint_list, EscapeSequence::CodepointList]
27
+
28
+ # hex escapes
29
+ include_examples 'parse', /a\xFF/n, 1 => [:escape, :hex, EscapeSequence::Hex]
30
+
31
+ # octal escapes
32
+ include_examples 'parse', /a\177/n, 1 => [:escape, :octal, EscapeSequence::Octal]
33
+
34
+ specify('parse chars and codepoints') do
35
+ root = RP.parse(/\n\?\101\x42\u0043\u{44 45}/)
36
+
37
+ expect(root[0].char).to eq "\n"
38
+ expect(root[0].codepoint).to eq 10
39
+
40
+ expect(root[1].char).to eq '?'
41
+ expect(root[1].codepoint).to eq 63
42
+
43
+ expect(root[2].char).to eq 'A'
44
+ expect(root[2].codepoint).to eq 65
45
+
46
+ expect(root[3].char).to eq 'B'
47
+ expect(root[3].codepoint).to eq 66
48
+
49
+ expect(root[4].char).to eq 'C'
50
+ expect(root[4].codepoint).to eq 67
51
+
52
+ expect(root[5].chars).to eq %w[D E]
53
+ expect(root[5].codepoints).to eq [68, 69]
54
+
55
+ expect { root[5].char }.to raise_error(/#chars/)
56
+ expect { root[5].codepoint }.to raise_error(/#codepoints/)
57
+ end
58
+
59
+ specify('parse escape control sequence lower') do
60
+ root = RP.parse(/a\\\c2b/)
61
+
62
+ expect(root[2]).to be_instance_of(EscapeSequence::Control)
63
+ expect(root[2].text).to eq '\\c2'
64
+ expect(root[2].char).to eq "\x12"
65
+ expect(root[2].codepoint).to eq 18
66
+ end
67
+
68
+ specify('parse escape control sequence upper') do
69
+ root = RP.parse(/\d\\\C-C\w/)
70
+
71
+ expect(root[2]).to be_instance_of(EscapeSequence::Control)
72
+ expect(root[2].text).to eq '\\C-C'
73
+ expect(root[2].char).to eq "\x03"
74
+ expect(root[2].codepoint).to eq 3
75
+ end
76
+
77
+ specify('parse escape meta sequence') do
78
+ root = RP.parse(/\Z\\\M-Z/n)
79
+
80
+ expect(root[2]).to be_instance_of(EscapeSequence::Meta)
81
+ expect(root[2].text).to eq '\\M-Z'
82
+ expect(root[2].char).to eq "\u00DA"
83
+ expect(root[2].codepoint).to eq 218
84
+ end
85
+
86
+ specify('parse escape meta control sequence') do
87
+ root = RP.parse(/\A\\\M-\C-X/n)
88
+
89
+ expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
90
+ expect(root[2].text).to eq '\\M-\\C-X'
91
+ expect(root[2].char).to eq "\u0098"
92
+ expect(root[2].codepoint).to eq 152
93
+ end
94
+
95
+ specify('parse lower c meta control sequence') do
96
+ root = RP.parse(/\A\\\M-\cX/n)
97
+
98
+ expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
99
+ expect(root[2].text).to eq '\\M-\\cX'
100
+ expect(root[2].char).to eq "\u0098"
101
+ expect(root[2].codepoint).to eq 152
102
+ end
103
+
104
+ specify('parse escape reverse meta control sequence') do
105
+ root = RP.parse(/\A\\\C-\M-X/n)
106
+
107
+ expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
108
+ expect(root[2].text).to eq '\\C-\\M-X'
109
+ expect(root[2].char).to eq "\u0098"
110
+ expect(root[2].codepoint).to eq 152
111
+ end
112
+
113
+ specify('parse escape reverse lower c meta control sequence') do
114
+ root = RP.parse(/\A\\\c\M-X/n)
115
+
116
+ expect(root[2]).to be_instance_of(EscapeSequence::MetaControl)
117
+ expect(root[2].text).to eq '\\c\\M-X'
118
+ expect(root[2].char).to eq "\u0098"
119
+ expect(root[2].codepoint).to eq 152
120
+ end
121
+ end
@@ -0,0 +1,130 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe('FreeSpace parsing') do
4
+ specify('parse free space spaces') do
5
+ regexp = /a ? b * c + d{2,4}/x
6
+ root = RP.parse(regexp)
7
+
8
+ 0.upto(6) do |i|
9
+ if i.odd?
10
+ expect(root[i]).to be_instance_of(WhiteSpace)
11
+ expect(root[i].text).to eq ' '
12
+ else
13
+ expect(root[i]).to be_instance_of(Literal)
14
+ expect(root[i]).to be_quantified
15
+ end
16
+ end
17
+ end
18
+
19
+ specify('parse non free space literals') do
20
+ regexp = /a b c d/
21
+ root = RP.parse(regexp)
22
+
23
+ expect(root.first).to be_instance_of(Literal)
24
+ expect(root.first.text).to eq 'a b c d'
25
+ end
26
+
27
+ specify('parse free space comments') do
28
+ regexp = /
29
+ a ? # One letter
30
+ b {2,5} # Another one
31
+ [c-g] + # A set
32
+ (h|i|j) | # A group
33
+ klm *
34
+ nop +
35
+ /x
36
+
37
+ root = RP.parse(regexp)
38
+
39
+ alt = root.first
40
+ expect(alt).to be_instance_of(Alternation)
41
+
42
+ alt_1 = alt.alternatives.first
43
+ expect(alt_1).to be_instance_of(Alternative)
44
+ expect(alt_1.length).to eq 15
45
+
46
+ [0, 2, 4, 6, 8, 12, 14].each do |i|
47
+ expect(alt_1[i]).to be_instance_of(WhiteSpace)
48
+ end
49
+
50
+ [3, 7, 11].each { |i| expect(alt_1[i].class).to eq Comment }
51
+
52
+ alt_2 = alt.alternatives.last
53
+ expect(alt_2).to be_instance_of(Alternative)
54
+ expect(alt_2.length).to eq 7
55
+
56
+ [0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
57
+
58
+ expect(alt_2[1]).to be_instance_of(Comment)
59
+ end
60
+
61
+ specify('parse free space nested comments') do
62
+ regexp = /
63
+ # Group one
64
+ (
65
+ abc # Comment one
66
+ \d? # Optional \d
67
+ )+
68
+
69
+ # Group two
70
+ (
71
+ def # Comment two
72
+ \s? # Optional \s
73
+ )?
74
+ /x
75
+
76
+ root = RP.parse(regexp)
77
+
78
+ top_comment_1 = root[1]
79
+ expect(top_comment_1).to be_instance_of(Comment)
80
+ expect(top_comment_1.text).to eq "# Group one\n"
81
+ expect(top_comment_1.starts_at).to eq 7
82
+
83
+ top_comment_2 = root[5]
84
+ expect(top_comment_2).to be_instance_of(Comment)
85
+ expect(top_comment_2.text).to eq "# Group two\n"
86
+ expect(top_comment_2.starts_at).to eq 95
87
+
88
+ [3, 7].each do |g,|
89
+ group = root[g]
90
+
91
+ [3, 7].each do |c|
92
+ comment = group[c]
93
+ expect(comment).to be_instance_of(Comment)
94
+ expect(comment.text.length).to eq 14
95
+ end
96
+ end
97
+ end
98
+
99
+ specify('parse free space quantifiers') do
100
+ regexp = /
101
+ a
102
+ # comment 1
103
+ ?
104
+ (
105
+ b # comment 2
106
+ # comment 3
107
+ +
108
+ )
109
+ # comment 4
110
+ *
111
+ /x
112
+
113
+ root = RP.parse(regexp)
114
+
115
+ literal_1 = root[1]
116
+ expect(literal_1).to be_instance_of(Literal)
117
+ expect(literal_1).to be_quantified
118
+ expect(literal_1.quantifier.token).to eq :zero_or_one
119
+
120
+ group = root[5]
121
+ expect(group).to be_instance_of(Group::Capture)
122
+ expect(group).to be_quantified
123
+ expect(group.quantifier.token).to eq :zero_or_more
124
+
125
+ literal_2 = group[1]
126
+ expect(literal_2).to be_instance_of(Literal)
127
+ expect(literal_2).to be_quantified
128
+ expect(literal_2.quantifier.token).to eq :one_or_more
129
+ end
130
+ end