natalie_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
data/src/lexer.cpp ADDED
@@ -0,0 +1,1703 @@
1
+ #include <errno.h>
2
+ #include <limits>
3
+ #include <stdlib.h>
4
+
5
+ #include "natalie_parser/lexer.hpp"
6
+ #include "natalie_parser/lexer/interpolated_string_lexer.hpp"
7
+ #include "natalie_parser/lexer/regexp_lexer.hpp"
8
+ #include "natalie_parser/lexer/word_array_lexer.hpp"
9
+ #include "natalie_parser/token.hpp"
10
+
11
+ namespace NatalieParser {
12
+
13
+ SharedPtr<Vector<Token>> Lexer::tokens() {
14
+ SharedPtr<Vector<Token>> tokens = new Vector<Token> {};
15
+ bool skip_next_newline = false;
16
+ Token last_doc_token;
17
+ for (;;) {
18
+ auto token = next_token();
19
+ if (token.is_comment())
20
+ continue;
21
+
22
+ if (token.is_doc()) {
23
+ if (last_doc_token)
24
+ last_doc_token.literal_string()->append(*token.literal_string());
25
+ else
26
+ last_doc_token = token;
27
+ continue;
28
+ }
29
+
30
+ // get rid of newlines after certain tokens
31
+ if (skip_next_newline) {
32
+ if (token.is_newline())
33
+ continue;
34
+ else
35
+ skip_next_newline = false;
36
+ }
37
+
38
+ // get rid of newlines before certain tokens
39
+ while (token.can_follow_collapsible_newline() && !tokens->is_empty() && tokens->last().is_newline())
40
+ tokens->pop();
41
+
42
+ if (last_doc_token) {
43
+ if (token.can_have_doc()) {
44
+ token.set_doc(last_doc_token.literal_string());
45
+ last_doc_token = {};
46
+ } else if (!token.is_end_of_line()) {
47
+ last_doc_token = {};
48
+ }
49
+ }
50
+
51
+ tokens->push(token);
52
+
53
+ m_last_token = token;
54
+
55
+ if (token.is_eof())
56
+ return tokens;
57
+ if (!token.is_valid())
58
+ return tokens;
59
+ if (token.can_precede_collapsible_newline())
60
+ skip_next_newline = true;
61
+ };
62
+ TM_UNREACHABLE();
63
+ }
64
+
65
+ Token Lexer::next_token() {
66
+ if (m_nested_lexer) {
67
+ auto token = m_nested_lexer->next_token();
68
+ if (token.is_eof()) {
69
+ if (m_nested_lexer->alters_parent_cursor_position()) {
70
+ m_index = m_nested_lexer->m_index;
71
+ m_cursor_line = m_nested_lexer->m_cursor_line;
72
+ m_cursor_column = m_nested_lexer->m_cursor_column;
73
+ }
74
+ delete m_nested_lexer;
75
+ m_nested_lexer = nullptr;
76
+ } else {
77
+ return token;
78
+ }
79
+ }
80
+ m_whitespace_precedes = skip_whitespace();
81
+ m_token_line = m_cursor_line;
82
+ m_token_column = m_cursor_column;
83
+ return build_next_token();
84
+ }
85
+
86
+ bool is_identifier_char(char c) {
87
+ if (!c) return false;
88
+ return isalnum(c) || c == '_' || (unsigned int)c >= 128;
89
+ }
90
+
91
+ bool is_message_suffix(char c) {
92
+ if (!c) return false;
93
+ return c == '?' || c == '!';
94
+ }
95
+
96
+ bool is_identifier_char_or_message_suffix(char c) {
97
+ return is_identifier_char(c) || is_message_suffix(c);
98
+ }
99
+
100
+ bool Lexer::match(size_t bytes, const char *compare) {
101
+ if (m_index + bytes > m_size)
102
+ return false;
103
+ if (strncmp(compare, m_input->c_str() + m_index, bytes) == 0) {
104
+ if (m_index + bytes < m_size && is_identifier_char_or_message_suffix(m_input->at(m_index + bytes)))
105
+ return false;
106
+ advance(bytes);
107
+ return true;
108
+ }
109
+ return false;
110
+ }
111
+
112
+ void Lexer::advance() {
113
+ auto c = current_char();
114
+ m_index++;
115
+ if (c == '\n') {
116
+ m_cursor_line++;
117
+ m_cursor_column = 0;
118
+ } else {
119
+ m_cursor_column++;
120
+ }
121
+ }
122
+
123
+ void Lexer::advance(size_t bytes) {
124
+ for (size_t i = 0; i < bytes; i++) {
125
+ advance();
126
+ }
127
+ }
128
+
129
+ // NOTE: this does not work across lines
130
+ void Lexer::rewind(size_t bytes) {
131
+ current_char();
132
+ m_cursor_column -= bytes;
133
+ m_index -= bytes;
134
+ }
135
+
136
+ bool Lexer::skip_whitespace() {
137
+ bool whitespace_found = false;
138
+ char c = current_char();
139
+ while (c == ' ' || c == '\t' || (c == '\\' && peek() == '\n')) {
140
+ whitespace_found = true;
141
+ advance();
142
+ if (c == '\\') advance();
143
+ c = current_char();
144
+ }
145
+ return whitespace_found;
146
+ }
147
+
148
+ Token Lexer::build_next_token() {
149
+ if (m_index >= m_size)
150
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
151
+ if (m_start_char && current_char() == m_start_char) {
152
+ m_pair_depth++;
153
+ } else if (m_stop_char && current_char() == m_stop_char) {
154
+ if (m_pair_depth == 0)
155
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
156
+ m_pair_depth--;
157
+ } else if (m_index == 0 && current_char() == '\xEF') {
158
+ // UTF-8 BOM
159
+ advance(); // \xEF
160
+ if (current_char() == '\xBB') advance();
161
+ if (current_char() == '\xBF') advance();
162
+ }
163
+ Token token;
164
+ switch (current_char()) {
165
+ case '=': {
166
+ advance();
167
+ switch (current_char()) {
168
+ case '=': {
169
+ advance();
170
+ switch (current_char()) {
171
+ case '=': {
172
+ advance();
173
+ return Token { Token::Type::EqualEqualEqual, m_file, m_token_line, m_token_column };
174
+ }
175
+ default:
176
+ return Token { Token::Type::EqualEqual, m_file, m_token_line, m_token_column };
177
+ }
178
+ }
179
+ case '>':
180
+ advance();
181
+ return Token { Token::Type::HashRocket, m_file, m_token_line, m_token_column };
182
+ case '~':
183
+ advance();
184
+ return Token { Token::Type::Match, m_file, m_token_line, m_token_column };
185
+ default:
186
+ if (m_cursor_column == 1 && match(5, "begin")) {
187
+ SharedPtr<String> doc = new String("=begin");
188
+ char c = current_char();
189
+ do {
190
+ doc->append_char(c);
191
+ c = next();
192
+ } while (c && !(m_cursor_column == 0 && match(4, "=end")));
193
+ doc->append("=end\n");
194
+ return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
195
+ }
196
+ auto token = Token { Token::Type::Equal, m_file, m_token_line, m_token_column };
197
+ token.set_whitespace_precedes(m_whitespace_precedes);
198
+ return token;
199
+ }
200
+ }
201
+ case '+':
202
+ advance();
203
+ switch (current_char()) {
204
+ case '=':
205
+ advance();
206
+ return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column };
207
+ case '@':
208
+ if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
209
+ advance();
210
+ SharedPtr<String> lit = new String("+@");
211
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
212
+ } else {
213
+ return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
214
+ }
215
+ default:
216
+ return Token { Token::Type::Plus, m_file, m_token_line, m_token_column };
217
+ }
218
+ case '-':
219
+ advance();
220
+ switch (current_char()) {
221
+ case '>':
222
+ advance();
223
+ return Token { Token::Type::Arrow, m_file, m_token_line, m_token_column };
224
+ case '=':
225
+ advance();
226
+ return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column };
227
+ case '@':
228
+ if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
229
+ advance();
230
+ SharedPtr<String> lit = new String("-@");
231
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
232
+ } else {
233
+ return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
234
+ }
235
+ default:
236
+ return Token { Token::Type::Minus, m_file, m_token_line, m_token_column };
237
+ }
238
+ case '*':
239
+ advance();
240
+ switch (current_char()) {
241
+ case '*':
242
+ advance();
243
+ switch (current_char()) {
244
+ case '=':
245
+ advance();
246
+ return Token { Token::Type::StarStarEqual, m_file, m_token_line, m_token_column };
247
+ default:
248
+ return Token { Token::Type::StarStar, m_file, m_token_line, m_token_column };
249
+ }
250
+ case '=':
251
+ advance();
252
+ return Token { Token::Type::StarEqual, m_file, m_token_line, m_token_column };
253
+ default:
254
+ return Token { Token::Type::Star, m_file, m_token_line, m_token_column };
255
+ }
256
+ case '/': {
257
+ advance();
258
+ if (!m_last_token)
259
+ return consume_regexp('/', '/');
260
+ switch (m_last_token.type()) {
261
+ case Token::Type::Comma:
262
+ case Token::Type::Doc:
263
+ case Token::Type::LBracket:
264
+ case Token::Type::LCurlyBrace:
265
+ case Token::Type::LParen:
266
+ case Token::Type::Match:
267
+ case Token::Type::Newline:
268
+ return consume_regexp('/', '/');
269
+ case Token::Type::DefKeyword:
270
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
271
+ default: {
272
+ switch (current_char()) {
273
+ case ' ':
274
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
275
+ case '=':
276
+ advance();
277
+ return Token { Token::Type::SlashEqual, m_file, m_token_line, m_token_column };
278
+ default:
279
+ if (m_whitespace_precedes) {
280
+ return consume_regexp('/', '/');
281
+ } else {
282
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column };
283
+ }
284
+ }
285
+ }
286
+ }
287
+ }
288
+ case '%':
289
+ advance();
290
+ switch (current_char()) {
291
+ case '=':
292
+ advance();
293
+ return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column };
294
+ case 'q':
295
+ switch (peek()) {
296
+ case '[':
297
+ advance(2);
298
+ return consume_single_quoted_string('[', ']');
299
+ case '{':
300
+ advance(2);
301
+ return consume_single_quoted_string('{', '}');
302
+ case '<':
303
+ advance(2);
304
+ return consume_single_quoted_string('<', '>');
305
+ case '(':
306
+ advance(2);
307
+ return consume_single_quoted_string('(', ')');
308
+ default: {
309
+ char c = peek();
310
+ if (char_can_be_string_or_regexp_delimiter(c)) {
311
+ advance(2);
312
+ return consume_single_quoted_string(c, c);
313
+ } else {
314
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
315
+ }
316
+ }
317
+ }
318
+ case 'Q':
319
+ switch (peek()) {
320
+ case '[':
321
+ advance(2);
322
+ return consume_double_quoted_string('[', ']');
323
+ case '{':
324
+ advance(2);
325
+ return consume_double_quoted_string('{', '}');
326
+ case '<':
327
+ advance(2);
328
+ return consume_double_quoted_string('<', '>');
329
+ case '(':
330
+ advance(2);
331
+ return consume_double_quoted_string('(', ')');
332
+ default: {
333
+ char c = peek();
334
+ if (char_can_be_string_or_regexp_delimiter(c)) {
335
+ advance(2);
336
+ return consume_double_quoted_string(c, c);
337
+ } else {
338
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
339
+ }
340
+ }
341
+ }
342
+ case 'r':
343
+ switch (peek()) {
344
+ case '[':
345
+ advance(2);
346
+ return consume_regexp('[', ']');
347
+ case '{':
348
+ advance(2);
349
+ return consume_regexp('{', '}');
350
+ case '(':
351
+ advance(2);
352
+ return consume_regexp('(', ')');
353
+ case '<':
354
+ advance(2);
355
+ return consume_regexp('<', '>');
356
+ default: {
357
+ char c = peek();
358
+ if (char_can_be_string_or_regexp_delimiter(c)) {
359
+ advance(2);
360
+ return consume_regexp(c, c);
361
+ } else {
362
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
363
+ }
364
+ }
365
+ }
366
+ case 'x':
367
+ switch (peek()) {
368
+ case '/': {
369
+ advance(2);
370
+ return consume_double_quoted_string('/', '/', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
371
+ }
372
+ case '[': {
373
+ advance(2);
374
+ return consume_double_quoted_string('[', ']', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
375
+ }
376
+ case '{': {
377
+ advance(2);
378
+ return consume_double_quoted_string('{', '}', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
379
+ }
380
+ case '(': {
381
+ advance(2);
382
+ return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
383
+ }
384
+ default:
385
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
386
+ }
387
+ case 'w':
388
+ switch (peek()) {
389
+ case '/':
390
+ case '|': {
391
+ char c = next();
392
+ advance();
393
+ return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerW);
394
+ }
395
+ case '[':
396
+ advance(2);
397
+ return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerW);
398
+ case '{':
399
+ advance(2);
400
+ return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerW);
401
+ case '<':
402
+ advance(2);
403
+ return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerW);
404
+ case '(':
405
+ advance(2);
406
+ return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
407
+ default:
408
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
409
+ }
410
+ case 'W':
411
+ switch (peek()) {
412
+ case '/':
413
+ case '|': {
414
+ char c = next();
415
+ advance();
416
+ return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperW);
417
+ }
418
+ case '[':
419
+ advance(2);
420
+ return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperW);
421
+ case '{':
422
+ advance(2);
423
+ return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperW);
424
+ case '<':
425
+ advance(2);
426
+ return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperW);
427
+ case '(':
428
+ advance(2);
429
+ return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
430
+ default:
431
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
432
+ }
433
+ case 'i':
434
+ switch (peek()) {
435
+ case '|':
436
+ case '/': {
437
+ char c = next();
438
+ advance();
439
+ return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerI);
440
+ }
441
+ case '[':
442
+ advance(2);
443
+ return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerI);
444
+ case '{':
445
+ advance(2);
446
+ return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerI);
447
+ case '<':
448
+ advance(2);
449
+ return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerI);
450
+ case '(':
451
+ advance(2);
452
+ return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
453
+ default:
454
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
455
+ }
456
+ case 'I':
457
+ switch (peek()) {
458
+ case '|':
459
+ case '/': {
460
+ char c = next();
461
+ advance();
462
+ return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperI);
463
+ }
464
+ case '[':
465
+ advance(2);
466
+ return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperI);
467
+ case '{':
468
+ advance(2);
469
+ return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperI);
470
+ case '<':
471
+ advance(2);
472
+ return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperI);
473
+ case '(':
474
+ advance(2);
475
+ return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
476
+ default:
477
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
478
+ }
479
+ case '[':
480
+ advance();
481
+ return consume_double_quoted_string('[', ']');
482
+ case '{':
483
+ advance();
484
+ return consume_double_quoted_string('{', '}');
485
+ case '<':
486
+ advance();
487
+ return consume_double_quoted_string('<', '>');
488
+ case '(':
489
+ if (m_last_token.type() == Token::Type::DefKeyword || m_last_token.type() == Token::Type::Dot) {
490
+ // It's a trap! This looks like a %(string) but it's a method def/call!
491
+ break;
492
+ }
493
+ advance();
494
+ return consume_double_quoted_string('(', ')');
495
+ default: {
496
+ auto c = current_char();
497
+ if (char_can_be_string_or_regexp_delimiter(c)) {
498
+ advance();
499
+ return consume_double_quoted_string(c, c);
500
+ }
501
+ break;
502
+ }
503
+ }
504
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column };
505
+ case '!':
506
+ advance();
507
+ switch (current_char()) {
508
+ case '=':
509
+ advance();
510
+ return Token { Token::Type::NotEqual, m_file, m_token_line, m_token_column };
511
+ case '~':
512
+ advance();
513
+ return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column };
514
+ case '@':
515
+ if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
516
+ advance();
517
+ SharedPtr<String> lit = new String("!@");
518
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
519
+ } else {
520
+ return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
521
+ }
522
+ default:
523
+ return Token { Token::Type::Not, m_file, m_token_line, m_token_column };
524
+ }
525
+ case '<':
526
+ advance();
527
+ switch (current_char()) {
528
+ case '<': {
529
+ advance();
530
+ switch (current_char()) {
531
+ case '~':
532
+ case '-': {
533
+ auto next = peek();
534
+ if (isalpha(next))
535
+ return consume_heredoc();
536
+ switch (next) {
537
+ case '_':
538
+ case '"':
539
+ case '`':
540
+ case '\'':
541
+ return consume_heredoc();
542
+ default:
543
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
544
+ }
545
+ }
546
+ case '=':
547
+ advance();
548
+ return Token { Token::Type::LeftShiftEqual, m_file, m_token_line, m_token_column };
549
+ default:
550
+ if (!m_whitespace_precedes) {
551
+ if (token_is_first_on_line())
552
+ return consume_heredoc();
553
+ else if (m_last_token.can_precede_heredoc_that_looks_like_left_shift_operator())
554
+ return consume_heredoc();
555
+ else
556
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
557
+ }
558
+ if (isalpha(current_char()))
559
+ return consume_heredoc();
560
+ switch (current_char()) {
561
+ case '_':
562
+ case '"':
563
+ case '`':
564
+ case '\'':
565
+ return consume_heredoc();
566
+ default:
567
+ return Token { Token::Type::LeftShift, m_file, m_token_line, m_token_column };
568
+ }
569
+ }
570
+ }
571
+ case '=':
572
+ advance();
573
+ switch (current_char()) {
574
+ case '>':
575
+ advance();
576
+ return Token { Token::Type::Comparison, m_file, m_token_line, m_token_column };
577
+ default:
578
+ return Token { Token::Type::LessThanOrEqual, m_file, m_token_line, m_token_column };
579
+ }
580
+ default:
581
+ return Token { Token::Type::LessThan, m_file, m_token_line, m_token_column };
582
+ }
583
+ case '>':
584
+ advance();
585
+ switch (current_char()) {
586
+ case '>':
587
+ advance();
588
+ switch (current_char()) {
589
+ case '=':
590
+ advance();
591
+ return Token { Token::Type::RightShiftEqual, m_file, m_token_line, m_token_column };
592
+ default:
593
+ return Token { Token::Type::RightShift, m_file, m_token_line, m_token_column };
594
+ }
595
+ case '=':
596
+ advance();
597
+ return Token { Token::Type::GreaterThanOrEqual, m_file, m_token_line, m_token_column };
598
+ default:
599
+ return Token { Token::Type::GreaterThan, m_file, m_token_line, m_token_column };
600
+ }
601
+ case '&':
602
+ advance();
603
+ switch (current_char()) {
604
+ case '&':
605
+ advance();
606
+ switch (current_char()) {
607
+ case '=':
608
+ advance();
609
+ return Token { Token::Type::AmpersandAmpersandEqual, m_file, m_token_line, m_token_column };
610
+ default:
611
+ return Token { Token::Type::AmpersandAmpersand, m_file, m_token_line, m_token_column };
612
+ }
613
+ case '=':
614
+ advance();
615
+ return Token { Token::Type::AmpersandEqual, m_file, m_token_line, m_token_column };
616
+ case '.':
617
+ advance();
618
+ return Token { Token::Type::SafeNavigation, m_file, m_token_line, m_token_column };
619
+ default:
620
+ return Token { Token::Type::Ampersand, m_file, m_token_line, m_token_column };
621
+ }
622
+ case '|':
623
+ advance();
624
+ switch (current_char()) {
625
+ case '|':
626
+ advance();
627
+ switch (current_char()) {
628
+ case '=':
629
+ advance();
630
+ return Token { Token::Type::PipePipeEqual, m_file, m_token_line, m_token_column };
631
+ default:
632
+ return Token { Token::Type::PipePipe, m_file, m_token_line, m_token_column };
633
+ }
634
+ case '=':
635
+ advance();
636
+ return Token { Token::Type::PipeEqual, m_file, m_token_line, m_token_column };
637
+ default:
638
+ return Token { Token::Type::Pipe, m_file, m_token_line, m_token_column };
639
+ }
640
+ case '^':
641
+ advance();
642
+ switch (current_char()) {
643
+ case '=':
644
+ advance();
645
+ return Token { Token::Type::CaretEqual, m_file, m_token_line, m_token_column };
646
+ default:
647
+ return Token { Token::Type::Caret, m_file, m_token_line, m_token_column };
648
+ }
649
+ case '~':
650
+ advance();
651
+ switch (current_char()) {
652
+ case '@':
653
+ if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
654
+ advance();
655
+ SharedPtr<String> lit = new String("~@");
656
+ return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column };
657
+ } else {
658
+ return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
659
+ }
660
+ default:
661
+ return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column };
662
+ }
663
+ case '?': {
664
+ auto c = next();
665
+ if (isspace(c)) {
666
+ m_open_ternary = true;
667
+ return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column };
668
+ } else {
669
+ advance();
670
+ if (c == '\\') {
671
+ auto buf = new String();
672
+ auto result = consume_escaped_byte(*buf);
673
+ if (!result.first)
674
+ return Token { result.second, current_char(), m_file, m_token_line, m_token_column };
675
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
676
+ } else {
677
+ return Token { Token::Type::String, c, m_file, m_token_line, m_token_column };
678
+ }
679
+ }
680
+ }
681
+ case ':': {
682
+ auto c = next();
683
+ if (c == ':') {
684
+ advance();
685
+ return Token { Token::Type::ConstantResolution, m_file, m_token_line, m_token_column };
686
+ } else if (m_last_token.type() == Token::Type::InterpolatedStringEnd && !m_whitespace_precedes && !m_open_ternary) {
687
+ return Token { Token::Type::InterpolatedStringSymbolKey, m_file, m_token_line, m_token_column };
688
+ } else if (c == '"') {
689
+ advance();
690
+ return consume_double_quoted_string('"', '"', Token::Type::InterpolatedSymbolBegin, Token::Type::InterpolatedSymbolEnd);
691
+ } else if (c == '\'') {
692
+ advance();
693
+ auto string = consume_single_quoted_string('\'', '\'');
694
+ return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column };
695
+ } else if (isspace(c)) {
696
+ m_open_ternary = false;
697
+ auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column };
698
+ token.set_whitespace_precedes(m_whitespace_precedes);
699
+ return token;
700
+ } else {
701
+ return consume_symbol();
702
+ }
703
+ }
704
+ case '@':
705
+ switch (peek()) {
706
+ case '@': {
707
+ // kinda janky, but we gotta trick consume_word and then prepend the '@' back on the front
708
+ advance();
709
+ auto token = consume_word(Token::Type::ClassVariable);
710
+ token.set_literal(String::format("@{}", token.literal()));
711
+ return token;
712
+ }
713
+ default:
714
+ return consume_word(Token::Type::InstanceVariable);
715
+ }
716
+ case '$':
717
+ if (peek() == '&') {
718
+ advance(2);
719
+ return Token { Token::Type::BackRef, '&', m_file, m_token_line, m_token_column };
720
+ } else if (peek() >= '1' && peek() <= '9') {
721
+ return consume_nth_ref();
722
+ } else {
723
+ return consume_global_variable();
724
+ }
725
+ case '.':
726
+ advance();
727
+ switch (current_char()) {
728
+ case '.':
729
+ advance();
730
+ switch (current_char()) {
731
+ case '.':
732
+ advance();
733
+ return Token { Token::Type::DotDotDot, m_file, m_token_line, m_token_column };
734
+ default:
735
+ return Token { Token::Type::DotDot, m_file, m_token_line, m_token_column };
736
+ }
737
+ default:
738
+ return Token { Token::Type::Dot, m_file, m_token_line, m_token_column };
739
+ }
740
+ case '{':
741
+ advance();
742
+ return Token { Token::Type::LCurlyBrace, m_file, m_token_line, m_token_column };
743
+ case '[': {
744
+ advance();
745
+ switch (current_char()) {
746
+ case ']':
747
+ advance();
748
+ switch (current_char()) {
749
+ case '=':
750
+ advance();
751
+ return Token { Token::Type::LBracketRBracketEqual, m_file, m_token_line, m_token_column };
752
+ default:
753
+ auto token = Token { Token::Type::LBracketRBracket, m_file, m_token_line, m_token_column };
754
+ token.set_whitespace_precedes(m_whitespace_precedes);
755
+ return token;
756
+ }
757
+ default:
758
+ auto token = Token { Token::Type::LBracket, m_file, m_token_line, m_token_column };
759
+ token.set_whitespace_precedes(m_whitespace_precedes);
760
+ return token;
761
+ }
762
+ }
763
+ case '(': {
764
+ advance();
765
+ auto token = Token { Token::Type::LParen, m_file, m_token_line, m_token_column };
766
+ token.set_whitespace_precedes(m_whitespace_precedes);
767
+ return token;
768
+ }
769
+ case '}':
770
+ advance();
771
+ return Token { Token::Type::RCurlyBrace, m_file, m_token_line, m_token_column };
772
+ case ']':
773
+ advance();
774
+ return Token { Token::Type::RBracket, m_file, m_token_line, m_token_column };
775
+ case ')':
776
+ advance();
777
+ return Token { Token::Type::RParen, m_file, m_token_line, m_token_column };
778
+ case '\n': {
779
+ advance();
780
+ auto token = Token { Token::Type::Newline, m_file, m_token_line, m_token_column };
781
+ if (!m_heredoc_stack.is_empty()) {
782
+ auto new_index = m_heredoc_stack.last();
783
+ while (m_index < new_index)
784
+ advance();
785
+ m_heredoc_stack.clear();
786
+ }
787
+ return token;
788
+ }
789
+ case ';':
790
+ advance();
791
+ return Token { Token::Type::Semicolon, m_file, m_token_line, m_token_column };
792
+ case ',':
793
+ advance();
794
+ return Token { Token::Type::Comma, m_file, m_token_line, m_token_column };
795
+ case '"':
796
+ advance();
797
+ return consume_double_quoted_string('"', '"');
798
+ case '\'':
799
+ advance();
800
+ return consume_single_quoted_string('\'', '\'');
801
+ case '`': {
802
+ advance();
803
+ return consume_double_quoted_string('`', '`', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
804
+ }
805
+ case '#':
806
+ if (token_is_first_on_line()) {
807
+ SharedPtr<String> doc = new String();
808
+ bool found_comment_marker = true;
809
+ char c = current_char();
810
+ while (c) {
811
+ if (!found_comment_marker) {
812
+ if (c == '#')
813
+ found_comment_marker = true;
814
+ else if (!isspace(c))
815
+ break;
816
+ }
817
+ if (c == '\n' || c == '\r') {
818
+ doc->append_char(c);
819
+ found_comment_marker = false;
820
+ } else if (found_comment_marker)
821
+ doc->append_char(c);
822
+ c = next();
823
+ }
824
+ return Token { Token::Type::Doc, doc, m_file, m_token_line, m_token_column };
825
+ } else {
826
+ char c;
827
+ do {
828
+ c = next();
829
+ } while (c && c != '\n' && c != '\r');
830
+ return Token { Token::Type::Comment, m_file, m_token_line, m_token_column };
831
+ }
832
+ case '0':
833
+ case '1':
834
+ case '2':
835
+ case '3':
836
+ case '4':
837
+ case '5':
838
+ case '6':
839
+ case '7':
840
+ case '8':
841
+ case '9': {
842
+ auto token = consume_numeric();
843
+ return token;
844
+ }
845
+ };
846
+
847
+ Token keyword_token;
848
+
849
+ if (!m_last_token.is_dot() && match(4, "self")) {
850
+ if (current_char() == '.')
851
+ keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
852
+ else
853
+ rewind(4);
854
+ }
855
+
856
+ if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
857
+ if (match(12, "__ENCODING__"))
858
+ keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column };
859
+ else if (match(8, "__LINE__"))
860
+ keyword_token = { Token::Type::LINEKeyword, m_file, m_token_line, m_token_column };
861
+ else if (match(8, "__FILE__"))
862
+ keyword_token = { Token::Type::FILEKeyword, m_file, m_token_line, m_token_column };
863
+ else if (match(5, "BEGIN"))
864
+ keyword_token = { Token::Type::BEGINKeyword, m_file, m_token_line, m_token_column };
865
+ else if (match(3, "END"))
866
+ keyword_token = { Token::Type::ENDKeyword, m_file, m_token_line, m_token_column };
867
+ else if (match(5, "alias"))
868
+ keyword_token = { Token::Type::AliasKeyword, m_file, m_token_line, m_token_column };
869
+ else if (match(3, "and"))
870
+ keyword_token = { Token::Type::AndKeyword, m_file, m_token_line, m_token_column };
871
+ else if (match(5, "begin"))
872
+ keyword_token = { Token::Type::BeginKeyword, m_file, m_token_line, m_token_column };
873
+ else if (match(5, "break"))
874
+ keyword_token = { Token::Type::BreakKeyword, m_file, m_token_line, m_token_column };
875
+ else if (match(4, "case"))
876
+ keyword_token = { Token::Type::CaseKeyword, m_file, m_token_line, m_token_column };
877
+ else if (match(5, "class"))
878
+ keyword_token = { Token::Type::ClassKeyword, m_file, m_token_line, m_token_column };
879
+ else if (match(8, "defined?"))
880
+ keyword_token = { Token::Type::DefinedKeyword, m_file, m_token_line, m_token_column };
881
+ else if (match(3, "def"))
882
+ keyword_token = { Token::Type::DefKeyword, m_file, m_token_line, m_token_column };
883
+ else if (match(2, "do"))
884
+ keyword_token = { Token::Type::DoKeyword, m_file, m_token_line, m_token_column };
885
+ else if (match(4, "else"))
886
+ keyword_token = { Token::Type::ElseKeyword, m_file, m_token_line, m_token_column };
887
+ else if (match(5, "elsif"))
888
+ keyword_token = { Token::Type::ElsifKeyword, m_file, m_token_line, m_token_column };
889
+ else if (match(3, "end"))
890
+ keyword_token = { Token::Type::EndKeyword, m_file, m_token_line, m_token_column };
891
+ else if (match(6, "ensure"))
892
+ keyword_token = { Token::Type::EnsureKeyword, m_file, m_token_line, m_token_column };
893
+ else if (match(5, "false"))
894
+ keyword_token = { Token::Type::FalseKeyword, m_file, m_token_line, m_token_column };
895
+ else if (match(3, "for"))
896
+ keyword_token = { Token::Type::ForKeyword, m_file, m_token_line, m_token_column };
897
+ else if (match(2, "if"))
898
+ keyword_token = { Token::Type::IfKeyword, m_file, m_token_line, m_token_column };
899
+ else if (match(2, "in"))
900
+ keyword_token = { Token::Type::InKeyword, m_file, m_token_line, m_token_column };
901
+ else if (match(6, "module"))
902
+ keyword_token = { Token::Type::ModuleKeyword, m_file, m_token_line, m_token_column };
903
+ else if (match(4, "next"))
904
+ keyword_token = { Token::Type::NextKeyword, m_file, m_token_line, m_token_column };
905
+ else if (match(3, "nil"))
906
+ keyword_token = { Token::Type::NilKeyword, m_file, m_token_line, m_token_column };
907
+ else if (match(3, "not"))
908
+ keyword_token = { Token::Type::NotKeyword, m_file, m_token_line, m_token_column };
909
+ else if (match(2, "or"))
910
+ keyword_token = { Token::Type::OrKeyword, m_file, m_token_line, m_token_column };
911
+ else if (match(4, "redo"))
912
+ keyword_token = { Token::Type::RedoKeyword, m_file, m_token_line, m_token_column };
913
+ else if (match(6, "rescue"))
914
+ keyword_token = { Token::Type::RescueKeyword, m_file, m_token_line, m_token_column };
915
+ else if (match(5, "retry"))
916
+ keyword_token = { Token::Type::RetryKeyword, m_file, m_token_line, m_token_column };
917
+ else if (match(6, "return"))
918
+ keyword_token = { Token::Type::ReturnKeyword, m_file, m_token_line, m_token_column };
919
+ else if (match(4, "self"))
920
+ keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column };
921
+ else if (match(5, "super"))
922
+ keyword_token = { Token::Type::SuperKeyword, m_file, m_token_line, m_token_column };
923
+ else if (match(4, "then"))
924
+ keyword_token = { Token::Type::ThenKeyword, m_file, m_token_line, m_token_column };
925
+ else if (match(4, "true"))
926
+ keyword_token = { Token::Type::TrueKeyword, m_file, m_token_line, m_token_column };
927
+ else if (match(5, "undef"))
928
+ keyword_token = { Token::Type::UndefKeyword, m_file, m_token_line, m_token_column };
929
+ else if (match(6, "unless"))
930
+ keyword_token = { Token::Type::UnlessKeyword, m_file, m_token_line, m_token_column };
931
+ else if (match(5, "until"))
932
+ keyword_token = { Token::Type::UntilKeyword, m_file, m_token_line, m_token_column };
933
+ else if (match(4, "when"))
934
+ keyword_token = { Token::Type::WhenKeyword, m_file, m_token_line, m_token_column };
935
+ else if (match(5, "while"))
936
+ keyword_token = { Token::Type::WhileKeyword, m_file, m_token_line, m_token_column };
937
+ else if (match(5, "yield"))
938
+ keyword_token = { Token::Type::YieldKeyword, m_file, m_token_line, m_token_column };
939
+ }
940
+
941
+ // if a colon comes next, it's not a keyword -- it's a symbol!
942
+ if (keyword_token && current_char() == ':' && peek() != ':' && !m_open_ternary) {
943
+ advance(); // :
944
+ auto name = keyword_token.type_value();
945
+ return Token { Token::Type::SymbolKey, name, m_file, m_token_line, m_token_column };
946
+ } else if (keyword_token) {
947
+ return keyword_token;
948
+ }
949
+
950
+ auto c = current_char();
951
+ if ((c >= 'a' && c <= 'z') || c == '_') {
952
+ return consume_bare_name();
953
+ } else if (c >= 'A' && c <= 'Z') {
954
+ return consume_constant();
955
+ } else {
956
+ auto buf = consume_non_whitespace();
957
+ auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column };
958
+ return token;
959
+ }
960
+
961
+ TM_UNREACHABLE();
962
+ }
963
+
964
+ Token Lexer::consume_symbol() {
965
+ char c = current_char();
966
+ SharedPtr<String> buf = new String("");
967
+ auto gobble = [&buf, this](char c) -> char { buf->append_char(c); return next(); };
968
+ switch (c) {
969
+ case '@':
970
+ c = gobble(c);
971
+ if (c == '@') c = gobble(c);
972
+ do {
973
+ c = gobble(c);
974
+ } while (is_identifier_char(c));
975
+ break;
976
+ case '$':
977
+ c = gobble(c);
978
+ do {
979
+ c = gobble(c);
980
+ } while (is_identifier_char(c));
981
+ break;
982
+ case '~':
983
+ c = gobble(c);
984
+ if (c == '@') advance();
985
+ break;
986
+ case '+':
987
+ case '-': {
988
+ c = gobble(c);
989
+ if (c == '@') gobble(c);
990
+ break;
991
+ }
992
+ case '&':
993
+ case '|':
994
+ case '^':
995
+ case '%':
996
+ case '/': {
997
+ gobble(c);
998
+ break;
999
+ }
1000
+ case '*':
1001
+ c = gobble(c);
1002
+ if (c == '*')
1003
+ gobble(c);
1004
+ break;
1005
+ case '=':
1006
+ switch (peek()) {
1007
+ case '=':
1008
+ c = gobble(c);
1009
+ c = gobble(c);
1010
+ if (c == '=') gobble(c);
1011
+ break;
1012
+ case '~':
1013
+ c = gobble(c);
1014
+ gobble(c);
1015
+ break;
1016
+ default:
1017
+ return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
1018
+ }
1019
+ break;
1020
+ case '!':
1021
+ c = gobble(c);
1022
+ switch (c) {
1023
+ case '=':
1024
+ case '~':
1025
+ case '@':
1026
+ gobble(c);
1027
+ default:
1028
+ break;
1029
+ }
1030
+ break;
1031
+ case '>':
1032
+ c = gobble(c);
1033
+ switch (c) {
1034
+ case '=':
1035
+ case '>':
1036
+ gobble(c);
1037
+ default:
1038
+ break;
1039
+ }
1040
+ break;
1041
+ case '<':
1042
+ c = gobble(c);
1043
+ switch (c) {
1044
+ case '=':
1045
+ c = gobble(c);
1046
+ if (c == '>') gobble(c);
1047
+ break;
1048
+ case '<':
1049
+ gobble(c);
1050
+ default:
1051
+ break;
1052
+ }
1053
+ break;
1054
+ case '[':
1055
+ if (peek() == ']') {
1056
+ c = gobble(c);
1057
+ c = gobble(c);
1058
+ if (c == '=') gobble(c);
1059
+ } else {
1060
+ return Token { Token::Type::Invalid, c, m_file, m_token_line, m_token_column };
1061
+ }
1062
+ break;
1063
+ default:
1064
+ do {
1065
+ c = gobble(c);
1066
+ } while (is_identifier_char(c));
1067
+ switch (c) {
1068
+ case '?':
1069
+ case '!':
1070
+ case '=':
1071
+ switch (peek()) {
1072
+ case '>':
1073
+ break;
1074
+ default:
1075
+ gobble(c);
1076
+ }
1077
+ default:
1078
+ break;
1079
+ }
1080
+ }
1081
+ return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column };
1082
+ }
1083
+
1084
+ Token Lexer::consume_word(Token::Type type) {
1085
+ char c = current_char();
1086
+ SharedPtr<String> buf = new String("");
1087
+ do {
1088
+ buf->append_char(c);
1089
+ c = next();
1090
+ } while (is_identifier_char(c));
1091
+ switch (c) {
1092
+ case '?':
1093
+ case '!':
1094
+ advance();
1095
+ buf->append_char(c);
1096
+ break;
1097
+ default:
1098
+ break;
1099
+ }
1100
+ return Token { type, buf, m_file, m_token_line, m_token_column };
1101
+ }
1102
+
1103
+ Token Lexer::consume_bare_name() {
1104
+ auto token = consume_word(Token::Type::BareName);
1105
+ auto c = current_char();
1106
+ if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
1107
+ advance();
1108
+ token.set_type(Token::Type::SymbolKey);
1109
+ }
1110
+ return token;
1111
+ }
1112
+
1113
+ Token Lexer::consume_constant() {
1114
+ auto token = consume_word(Token::Type::Constant);
1115
+ auto c = current_char();
1116
+ if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
1117
+ advance();
1118
+ token.set_type(Token::Type::SymbolKey);
1119
+ }
1120
+ return token;
1121
+ }
1122
+
1123
+ Token Lexer::consume_global_variable() {
1124
+ switch (peek()) {
1125
+ case '?':
1126
+ case '!':
1127
+ case '=':
1128
+ case '@':
1129
+ case '&':
1130
+ case '`':
1131
+ case '\'':
1132
+ case '"':
1133
+ case '+':
1134
+ case '/':
1135
+ case '\\':
1136
+ case ';':
1137
+ case '<':
1138
+ case '>':
1139
+ case '$':
1140
+ case '*':
1141
+ case '.':
1142
+ case ',':
1143
+ case ':':
1144
+ case '_':
1145
+ case '~': {
1146
+ advance();
1147
+ SharedPtr<String> buf = new String("$");
1148
+ buf->append_char(current_char());
1149
+ advance();
1150
+ return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
1151
+ }
1152
+ case '-': {
1153
+ SharedPtr<String> buf = new String("$-");
1154
+ advance(2);
1155
+ buf->append_char(current_char());
1156
+ advance();
1157
+ return Token { Token::Type::GlobalVariable, buf, m_file, m_token_line, m_token_column };
1158
+ }
1159
+ default: {
1160
+ return consume_word(Token::Type::GlobalVariable);
1161
+ }
1162
+ }
1163
+ }
1164
+
1165
+ bool is_valid_heredoc(bool with_dash, SharedPtr<String> doc, String heredoc_name) {
1166
+ if (!doc->ends_with(heredoc_name))
1167
+ return false;
1168
+ if (doc->length() - heredoc_name.length() == 0)
1169
+ return true;
1170
+ auto prefix = (*doc)[doc->length() - heredoc_name.length() - 1];
1171
+ return with_dash ? isspace(prefix) : prefix == '\n';
1172
+ }
1173
+
1174
+ size_t get_heredoc_indent(SharedPtr<String> doc) {
1175
+ if (doc->is_empty())
1176
+ return 0;
1177
+ size_t heredoc_indent = std::numeric_limits<size_t>::max();
1178
+ size_t line_indent = 0;
1179
+ bool maybe_blank_line = true;
1180
+ for (size_t i = 0; i < doc->length(); i++) {
1181
+ char c = (*doc)[i];
1182
+ if (c == '\n') {
1183
+ if (!maybe_blank_line && line_indent < heredoc_indent)
1184
+ heredoc_indent = line_indent;
1185
+ line_indent = 0;
1186
+ maybe_blank_line = true;
1187
+ } else if (isspace(c)) {
1188
+ if (maybe_blank_line)
1189
+ line_indent++;
1190
+ } else {
1191
+ maybe_blank_line = false;
1192
+ }
1193
+ }
1194
+ return heredoc_indent;
1195
+ }
1196
+
1197
+ void dedent_heredoc(SharedPtr<String> &doc) {
1198
+ size_t heredoc_indent = get_heredoc_indent(doc);
1199
+ if (heredoc_indent == 0)
1200
+ return;
1201
+ SharedPtr<String> new_doc = new String("");
1202
+ size_t line_begin = 0;
1203
+ for (size_t i = 0; i < doc->length(); i++) {
1204
+ char c = (*doc)[i];
1205
+ if (c == '\n') {
1206
+ line_begin += heredoc_indent;
1207
+ if (line_begin < i)
1208
+ new_doc->append(doc->substring(line_begin, i - line_begin));
1209
+ new_doc->append_char('\n');
1210
+ line_begin = i + 1;
1211
+ }
1212
+ }
1213
+ doc = new_doc;
1214
+ }
1215
+
1216
+ Token Lexer::consume_heredoc() {
1217
+ bool with_dash = false;
1218
+ bool should_dedent = false;
1219
+ switch (current_char()) {
1220
+ case '-':
1221
+ advance();
1222
+ with_dash = true;
1223
+ break;
1224
+ case '~':
1225
+ advance();
1226
+ with_dash = true;
1227
+ should_dedent = true;
1228
+ break;
1229
+ }
1230
+
1231
+ auto begin_type = Token::Type::InterpolatedStringBegin;
1232
+ auto end_type = Token::Type::InterpolatedStringEnd;
1233
+ bool should_interpolate = true;
1234
+ char delimiter = 0;
1235
+ String heredoc_name = "";
1236
+ switch (current_char()) {
1237
+ case '"':
1238
+ delimiter = '"';
1239
+ break;
1240
+ case '`':
1241
+ begin_type = Token::Type::InterpolatedShellBegin;
1242
+ end_type = Token::Type::InterpolatedShellEnd;
1243
+ delimiter = '`';
1244
+ break;
1245
+ case '\'':
1246
+ should_interpolate = false;
1247
+ delimiter = '\'';
1248
+ break;
1249
+ default:
1250
+ delimiter = 0;
1251
+ }
1252
+
1253
+ if (delimiter) {
1254
+ char c = next();
1255
+ while (c != delimiter) {
1256
+ switch (c) {
1257
+ case '\n':
1258
+ case '\r':
1259
+ case 0:
1260
+ return Token { Token::Type::UnterminatedString, "heredoc identifier", m_file, m_token_line, m_token_column };
1261
+ default:
1262
+ heredoc_name.append_char(c);
1263
+ c = next();
1264
+ }
1265
+ }
1266
+ advance();
1267
+ } else {
1268
+ heredoc_name = String(consume_word(Token::Type::BareName).literal());
1269
+ }
1270
+
1271
+ SharedPtr<String> doc = new String("");
1272
+ size_t heredoc_index = m_index;
1273
+ auto get_char = [&heredoc_index, this]() { return (heredoc_index >= m_size) ? 0 : m_input->at(heredoc_index); };
1274
+
1275
+ if (m_heredoc_stack.is_empty()) {
1276
+ // start consuming the heredoc on the next line
1277
+ while (get_char() != '\n') {
1278
+ if (heredoc_index >= m_size)
1279
+ return Token { Token::Type::UnterminatedString, "heredoc", m_file, m_token_line, m_token_column };
1280
+ heredoc_index++;
1281
+ }
1282
+ heredoc_index++;
1283
+ } else {
1284
+ // start consuming the heredoc right after the last one
1285
+ heredoc_index = m_heredoc_stack.last();
1286
+ }
1287
+
1288
+ // consume the heredoc until we find the delimiter, either '\n' (if << was used) or any whitespace (if <<- was used) followed by "DELIM\n"
1289
+ for (;;) {
1290
+ if (heredoc_index >= m_size) {
1291
+ if (is_valid_heredoc(with_dash, doc, heredoc_name))
1292
+ break;
1293
+ return Token { Token::Type::UnterminatedString, doc, m_file, m_token_line, m_token_column };
1294
+ }
1295
+ char c = get_char();
1296
+ heredoc_index++;
1297
+ if (c == '\n' && is_valid_heredoc(with_dash, doc, heredoc_name))
1298
+ break;
1299
+ doc->append_char(c);
1300
+ }
1301
+
1302
+ // chop the delimiter and any trailing space off the string
1303
+ doc->truncate(doc->length() - heredoc_name.length());
1304
+ doc->strip_trailing_spaces();
1305
+
1306
+ if (should_dedent)
1307
+ dedent_heredoc(doc);
1308
+
1309
+ // We have to keep tokenizing on the line where the heredoc was started, and then jump to the line after the heredoc.
1310
+ // This index is used to jump to the end of the heredoc later.
1311
+ m_heredoc_stack.push(heredoc_index);
1312
+
1313
+ auto token = Token { Token::Type::String, doc, m_file, m_token_line, m_token_column };
1314
+
1315
+ if (should_interpolate) {
1316
+ m_nested_lexer = new InterpolatedStringLexer { *this, token, end_type };
1317
+ return Token { begin_type, m_file, m_token_line, m_token_column };
1318
+ }
1319
+
1320
+ return token;
1321
+ }
1322
+
1323
+ Token Lexer::consume_numeric() {
1324
+ SharedPtr<String> chars = new String;
1325
+ if (current_char() == '0') {
1326
+ switch (peek()) {
1327
+ case 'd':
1328
+ case 'D': {
1329
+ advance();
1330
+ char c = next();
1331
+ if (!isdigit(c))
1332
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1333
+ do {
1334
+ chars->append_char(c);
1335
+ c = next();
1336
+ if (c == '_')
1337
+ c = next();
1338
+ } while (isdigit(c));
1339
+ return chars_to_fixnum_or_bignum_token(chars, 10, 0);
1340
+ }
1341
+ case 'o':
1342
+ case 'O': {
1343
+ chars->append_char('0');
1344
+ chars->append_char('o');
1345
+ advance();
1346
+ char c = next();
1347
+ if (!(c >= '0' && c <= '7'))
1348
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1349
+ do {
1350
+ chars->append_char(c);
1351
+ c = next();
1352
+ if (c == '_')
1353
+ c = next();
1354
+ } while (c >= '0' && c <= '7');
1355
+ return chars_to_fixnum_or_bignum_token(chars, 8, 2);
1356
+ }
1357
+ case 'x':
1358
+ case 'X': {
1359
+ chars->append_char('0');
1360
+ chars->append_char('x');
1361
+ advance();
1362
+ char c = next();
1363
+ if (!isxdigit(c))
1364
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1365
+ do {
1366
+ chars->append_char(c);
1367
+ c = next();
1368
+ if (c == '_')
1369
+ c = next();
1370
+ } while (isxdigit(c));
1371
+ return chars_to_fixnum_or_bignum_token(chars, 16, 2);
1372
+ }
1373
+ case 'b':
1374
+ case 'B': {
1375
+ chars->append_char('0');
1376
+ chars->append_char('b');
1377
+ advance();
1378
+ char c = next();
1379
+ if (c != '0' && c != '1')
1380
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1381
+ do {
1382
+ chars->append_char(c);
1383
+ c = next();
1384
+ if (c == '_')
1385
+ c = next();
1386
+ } while (c == '0' || c == '1');
1387
+ return chars_to_fixnum_or_bignum_token(chars, 2, 2);
1388
+ }
1389
+ }
1390
+ }
1391
+ char c = current_char();
1392
+ do {
1393
+ chars->append_char(c);
1394
+ c = next();
1395
+ if (c == '_')
1396
+ c = next();
1397
+ } while (isdigit(c));
1398
+ if ((c == '.' && isdigit(peek())) || (c == 'e' || c == 'E'))
1399
+ return consume_numeric_as_float(chars);
1400
+ else
1401
+ return chars_to_fixnum_or_bignum_token(chars, 10, 0);
1402
+ }
1403
+
1404
+ const long long max_fixnum = std::numeric_limits<long long>::max() / 2; // 63 bits for MRI
1405
+
1406
+ Token Lexer::chars_to_fixnum_or_bignum_token(SharedPtr<String> chars, int base, int offset) {
1407
+ errno = 0;
1408
+ auto fixnum = strtoll(chars->c_str() + offset, nullptr, base);
1409
+ if (errno != 0 || fixnum > max_fixnum)
1410
+ return Token { Token::Type::Bignum, chars, m_file, m_token_line, m_token_column };
1411
+ else
1412
+ return Token { Token::Type::Fixnum, fixnum, m_file, m_token_line, m_token_column };
1413
+ }
1414
+
1415
+ Token Lexer::consume_numeric_as_float(SharedPtr<String> chars) {
1416
+ char c = current_char();
1417
+ if (c == '.') {
1418
+ chars->append_char('.');
1419
+ c = next();
1420
+ do {
1421
+ chars->append_char(c);
1422
+ c = next();
1423
+ if (c == '_')
1424
+ c = next();
1425
+ } while (isdigit(c));
1426
+ }
1427
+ if (c == 'e' || c == 'E') {
1428
+ chars->append_char('e');
1429
+ c = next();
1430
+ if (c == '-' || c == '+') {
1431
+ chars->append_char(c);
1432
+ c = next();
1433
+ }
1434
+ if (!isdigit(c))
1435
+ return Token { Token::Type::Invalid, c, m_file, m_cursor_line, m_cursor_column };
1436
+ do {
1437
+ chars->append_char(c);
1438
+ c = next();
1439
+ if (c == '_')
1440
+ c = next();
1441
+ } while (isdigit(c));
1442
+ }
1443
+ double dbl = atof(chars->c_str());
1444
+ return Token { Token::Type::Float, dbl, m_file, m_token_line, m_token_column };
1445
+ }
1446
+
1447
+ Token Lexer::consume_nth_ref() {
1448
+ char c = next();
1449
+ long long num = 0;
1450
+ do {
1451
+ num *= 10;
1452
+ num += c - '0';
1453
+ c = next();
1454
+ } while (isdigit(c));
1455
+ return Token { Token::Type::NthRef, num, m_file, m_token_line, m_token_column };
1456
+ }
1457
+
1458
+ long long Lexer::consume_hex_number(int max_length, bool allow_underscore) {
1459
+ char c = current_char();
1460
+ int length = 0;
1461
+ long long number = 0;
1462
+ do {
1463
+ number *= 16;
1464
+ if (c >= 'a' && c <= 'f')
1465
+ number += c - 'a' + 10;
1466
+ else if (c >= 'A' && c <= 'F')
1467
+ number += c - 'A' + 10;
1468
+ else
1469
+ number += c - '0';
1470
+ c = next();
1471
+ if (allow_underscore && c == '_')
1472
+ c = next();
1473
+ } while (isxdigit(c) && (max_length == 0 || ++length < max_length));
1474
+ return number;
1475
+ }
1476
+
1477
+ long long Lexer::consume_octal_number(int max_length, bool allow_underscore) {
1478
+ char c = current_char();
1479
+ int length = 0;
1480
+ long long number = 0;
1481
+ do {
1482
+ number *= 8;
1483
+ number += c - '0';
1484
+ c = next();
1485
+ if (allow_underscore && c == '_')
1486
+ c = next();
1487
+ } while (c >= '0' && c <= '7' && (max_length == 0 || ++length < max_length));
1488
+ return number;
1489
+ }
1490
+
1491
+ // public domain
1492
+ // https://gist.github.com/Miouyouyou/864130e8734afe3f806512b14022226f
1493
+ void Lexer::utf32_codepoint_to_utf8(String &buf, long long codepoint) {
1494
+ if (codepoint < 0x80) {
1495
+ buf.append_char(codepoint);
1496
+ } else if (codepoint < 0x800) { // 00000yyy yyxxxxxx
1497
+ buf.append_char(0b11000000 | (codepoint >> 6));
1498
+ buf.append_char(0b10000000 | (codepoint & 0x3f));
1499
+ } else if (codepoint < 0x10000) { // zzzzyyyy yyxxxxxx
1500
+ buf.append_char(0b11100000 | (codepoint >> 12));
1501
+ buf.append_char(0b10000000 | ((codepoint >> 6) & 0x3f));
1502
+ buf.append_char(0b10000000 | (codepoint & 0x3f));
1503
+ } else if (codepoint < 0x200000) { // 000uuuuu zzzzyyyy yyxxxxxx
1504
+ buf.append_char(0b11110000 | (codepoint >> 18));
1505
+ buf.append_char(0b10000000 | ((codepoint >> 12) & 0x3f));
1506
+ buf.append_char(0b10000000 | ((codepoint >> 6) & 0x3f));
1507
+ buf.append_char(0b10000000 | (codepoint & 0x3f));
1508
+ } else {
1509
+ TM_UNREACHABLE();
1510
+ }
1511
+ }
1512
+
1513
+ std::pair<bool, Token::Type> Lexer::consume_escaped_byte(String &buf) {
1514
+ auto control_character = [&](bool meta) {
1515
+ char c = next();
1516
+ if (c == '-')
1517
+ c = next();
1518
+ int num = 0;
1519
+ if (!meta && c == '\\' && peek() == 'M') {
1520
+ advance(); // M
1521
+ c = next();
1522
+ if (c != '-')
1523
+ return -1;
1524
+ meta = true;
1525
+ c = next();
1526
+ }
1527
+ if (c == '?')
1528
+ num = 127;
1529
+ else if (c >= ' ' && c <= '>')
1530
+ num = c - ' ';
1531
+ else if (c >= '@' && c <= '_')
1532
+ num = c - '@';
1533
+ else if (c >= '`' && c <= '~')
1534
+ num = c - '`';
1535
+ if (meta)
1536
+ return num + 128;
1537
+ else
1538
+ return num;
1539
+ };
1540
+ auto c = current_char();
1541
+ if (c >= '0' && c <= '7') {
1542
+ auto number = consume_octal_number(3);
1543
+ buf.append_char(number);
1544
+ } else if (c == 'x') {
1545
+ // hex: 1-2 digits
1546
+ advance();
1547
+ auto number = consume_hex_number(2);
1548
+ buf.append_char(number);
1549
+ } else if (c == 'u') {
1550
+ c = next();
1551
+ if (c == '{') {
1552
+ c = next();
1553
+ // unicode characters, space separated, 1-6 hex digits
1554
+ while (c != '}') {
1555
+ if (!isxdigit(c))
1556
+ return { false, Token::Type::InvalidUnicodeEscape };
1557
+ auto codepoint = consume_hex_number(6);
1558
+ utf32_codepoint_to_utf8(buf, codepoint);
1559
+ c = current_char();
1560
+ while (c == ' ')
1561
+ c = next();
1562
+ }
1563
+ if (c == '}')
1564
+ advance();
1565
+ } else {
1566
+ // unicode: 4 hex digits
1567
+ auto codepoint = consume_hex_number(4);
1568
+ utf32_codepoint_to_utf8(buf, codepoint);
1569
+ }
1570
+ } else {
1571
+ switch (c) {
1572
+ case 'a':
1573
+ buf.append_char('\a');
1574
+ break;
1575
+ case 'b':
1576
+ buf.append_char('\b');
1577
+ break;
1578
+ case 'c':
1579
+ case 'C': {
1580
+ int num = control_character(false);
1581
+ if (num == -1)
1582
+ return { false, Token::Type::InvalidCharacterEscape };
1583
+ buf.append_char((unsigned char)num);
1584
+ break;
1585
+ }
1586
+ case 'e':
1587
+ buf.append_char('\e');
1588
+ break;
1589
+ case 'f':
1590
+ buf.append_char('\f');
1591
+ break;
1592
+ case 'M': {
1593
+ c = next();
1594
+ if (c != '-')
1595
+ return { false, Token::Type::InvalidCharacterEscape };
1596
+ c = next();
1597
+ int num = 0;
1598
+ if (c == '\\' && (peek() == 'c' || peek() == 'C')) {
1599
+ advance();
1600
+ num = control_character(true);
1601
+ } else {
1602
+ num = (int)c + 128;
1603
+ }
1604
+ buf.append_char((unsigned char)num);
1605
+ break;
1606
+ }
1607
+ case 'n':
1608
+ buf.append_char('\n');
1609
+ break;
1610
+ case 'r':
1611
+ buf.append_char('\r');
1612
+ break;
1613
+ case 's':
1614
+ buf.append_char((unsigned char)32);
1615
+ break;
1616
+ case 't':
1617
+ buf.append_char('\t');
1618
+ break;
1619
+ case 'v':
1620
+ buf.append_char('\v');
1621
+ break;
1622
+ case '\n':
1623
+ break;
1624
+ default:
1625
+ buf.append_char(c);
1626
+ break;
1627
+ }
1628
+ advance();
1629
+ }
1630
+ return { true, Token::Type::String };
1631
+ }
1632
+
1633
+ bool Lexer::token_is_first_on_line() const {
1634
+ return !m_last_token || m_last_token.is_newline();
1635
+ }
1636
+
1637
+ Token Lexer::consume_double_quoted_string(char start_char, char stop_char, Token::Type begin_type, Token::Type end_type) {
1638
+ m_nested_lexer = new InterpolatedStringLexer { *this, start_char, stop_char, end_type };
1639
+ return Token { begin_type, start_char, m_file, m_token_line, m_token_column };
1640
+ }
1641
+
1642
+ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1643
+ int pair_depth = 0;
1644
+ SharedPtr<String> buf = new String("");
1645
+ char c = current_char();
1646
+ while (c) {
1647
+ if (c == '\\') {
1648
+ c = next();
1649
+ if (c == stop_char || c == '\\') {
1650
+ buf->append_char(c);
1651
+ } else {
1652
+ buf->append_char('\\');
1653
+ buf->append_char(c);
1654
+ }
1655
+ } else if (c == start_char && start_char != stop_char) {
1656
+ pair_depth++;
1657
+ buf->append_char(c);
1658
+ } else if (c == stop_char) {
1659
+ if (pair_depth > 0) {
1660
+ pair_depth--;
1661
+ buf->append_char(c);
1662
+ } else {
1663
+ advance(); // '
1664
+ if (current_char() == ':' && !m_open_ternary) {
1665
+ advance(); // :
1666
+ return Token { Token::Type::SymbolKey, buf, m_file, m_token_line, m_token_column };
1667
+ } else {
1668
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
1669
+ }
1670
+ }
1671
+ } else {
1672
+ buf->append_char(c);
1673
+ }
1674
+ c = next();
1675
+ }
1676
+ return Token { Token::Type::UnterminatedString, start_char, m_file, m_token_line, m_token_column };
1677
+ }
1678
+
1679
+ Token Lexer::consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type) {
1680
+ m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, false };
1681
+ return Token { type, start_char, m_file, m_token_line, m_token_column };
1682
+ }
1683
+
1684
+ Token Lexer::consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type) {
1685
+ m_nested_lexer = new WordArrayLexer { *this, start_char, stop_char, true };
1686
+ return Token { type, start_char, m_file, m_token_line, m_token_column };
1687
+ }
1688
+
1689
+ Token Lexer::consume_regexp(char start_char, char stop_char) {
1690
+ m_nested_lexer = new RegexpLexer { *this, start_char, stop_char };
1691
+ return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column };
1692
+ }
1693
+
1694
+ SharedPtr<String> Lexer::consume_non_whitespace() {
1695
+ char c = current_char();
1696
+ SharedPtr<String> buf = new String("");
1697
+ do {
1698
+ buf->append_char(c);
1699
+ c = next();
1700
+ } while (c && c != ' ' && c != '\t' && c != '\n' && c != '\r');
1701
+ return buf;
1702
+ }
1703
+ };