gloss 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,31 @@
1
+ require "./lib/cr_ruby"
2
+ require "./cr_ast"
3
+ require "./rb_ast"
4
+ require "./parser"
5
+
6
+ def parse_buffer(self : CrRuby::VALUE, buffer : CrRuby::VALUE)
7
+ plain_buffer = CrRuby.rb_str_to_str(buffer)
8
+ c_buffer = CrRuby.rb_string_value_cstr(pointerof(plain_buffer))
9
+ crystal_buffer = String.new(c_buffer)
10
+
11
+ output = parse_buffer(crystal_buffer)
12
+
13
+ CrRuby.rb_str_new_cstr(output)
14
+ end
15
+
16
+ def parse_buffer(buffer : String)
17
+ begin
18
+ tree = Gloss::Parser.parse buffer
19
+ tree.to_rb.to_json
20
+ rescue e : Crystal::SyntaxException
21
+ pp e.backtrace
22
+ e.to_s
23
+ end
24
+ end
25
+
26
+ fun init = Init_gloss
27
+ GC.init
28
+ LibCrystalMain.__crystal_main(0, Pointer(Pointer(UInt8)).null)
29
+ gloss = CrRuby.rb_define_module("Gloss");
30
+ CrRuby.rb_define_singleton_method(gloss, "parse_buffer", ->parse_buffer(CrRuby::VALUE, CrRuby::VALUE), 1);
31
+ end
@@ -0,0 +1,1186 @@
1
+ require "compiler/crystal/syntax/*"
2
+
3
+ module Crystal
4
+ class Lexer
5
+ def next_token
6
+ reset_token
7
+
8
+ # Skip comments
9
+ while current_char == '#'
10
+ start = current_pos
11
+
12
+ # Check #<loc:...> pragma comment
13
+ if next_char_no_column_increment == '<' &&
14
+ next_char_no_column_increment == 'l' &&
15
+ next_char_no_column_increment == 'o' &&
16
+ next_char_no_column_increment == 'c' &&
17
+ next_char_no_column_increment == ':'
18
+ next_char_no_column_increment
19
+ consume_loc_pragma
20
+ start = current_pos
21
+ else
22
+ if @doc_enabled
23
+ consume_doc
24
+ elsif @comments_enabled
25
+ return consume_comment(start)
26
+ else
27
+ skip_comment
28
+ end
29
+ end
30
+ end
31
+
32
+ start = current_pos
33
+
34
+ # Fix location by `macro_expansion_pragmas`.
35
+ if me_pragmas = macro_expansion_pragmas
36
+ # It might happen that the current "start" already passed some
37
+ # location pragmas, so we must consume all of those. For example
38
+ # if one does `@{{...}}` inside a macro, "start" will be one less
39
+ # number than the pragma, and after consuming "@..." if we don't
40
+ # consume the pragmas generated by `{{...}}` we'll be in an
41
+ # incorrect location.
42
+ while me_pragmas.first_key?.try &.<=(start)
43
+ _, pragmas = me_pragmas.shift
44
+ pragmas.each &.run_pragma self
45
+ end
46
+ end
47
+
48
+ reset_regex_flags = true
49
+
50
+ case current_char
51
+ when '\0'
52
+ @token.type = :EOF
53
+ when ' ', '\t'
54
+ consume_whitespace
55
+ reset_regex_flags = false
56
+ when '\\'
57
+ if next_char == '\n'
58
+ incr_line_number
59
+ @token.passed_backslash_newline = true
60
+ consume_whitespace
61
+ reset_regex_flags = false
62
+ else
63
+ unknown_token
64
+ end
65
+ when '\n'
66
+ @token.type = :NEWLINE
67
+ next_char
68
+ incr_line_number
69
+ reset_regex_flags = false
70
+ consume_newlines
71
+ when '\r'
72
+ if next_char == '\n'
73
+ next_char
74
+ @token.type = :NEWLINE
75
+ incr_line_number
76
+ consume_newlines
77
+ else
78
+ raise "expected '\\n' after '\\r'"
79
+ end
80
+ when '='
81
+ case next_char
82
+ when '='
83
+ case next_char
84
+ when '='
85
+ next_char :"==="
86
+ else
87
+ @token.type = :"=="
88
+ end
89
+ when '>'
90
+ next_char :"=>"
91
+ when '~'
92
+ next_char :"=~"
93
+ else
94
+ @token.type = :"="
95
+ end
96
+ when '!'
97
+ case next_char
98
+ when '='
99
+ next_char :"!="
100
+ when '~'
101
+ next_char :"!~"
102
+ else
103
+ @token.type = :"!"
104
+ end
105
+ when '<'
106
+ case next_char
107
+ when '='
108
+ case next_char
109
+ when '>'
110
+ next_char :"<=>"
111
+ else
112
+ @token.type = :"<="
113
+ end
114
+ when '<'
115
+ case next_char
116
+ when '='
117
+ next_char :"<<="
118
+ when '-'
119
+ has_single_quote = false
120
+ found_closing_single_quote = false
121
+
122
+ char = next_char
123
+ start_here = current_pos
124
+
125
+ if char == '\''
126
+ has_single_quote = true
127
+ char = next_char
128
+ start_here = current_pos
129
+ end
130
+
131
+ unless ident_part?(char)
132
+ raise "heredoc identifier starts with invalid character"
133
+ end
134
+
135
+ end_here = 0
136
+
137
+ while true
138
+ char = next_char
139
+ case
140
+ when char == '\r'
141
+ if peek_next_char == '\n'
142
+ end_here = current_pos
143
+ next_char
144
+ break
145
+ else
146
+ raise "expecting '\\n' after '\\r'"
147
+ end
148
+ when char == '\n'
149
+ end_here = current_pos
150
+ break
151
+ when ident_part?(char)
152
+ # ok
153
+ when char == '\0'
154
+ raise "Unexpected EOF on heredoc identifier"
155
+ else
156
+ if char == '\'' && has_single_quote
157
+ found_closing_single_quote = true
158
+ end_here = current_pos
159
+ next_char
160
+ break
161
+ elsif has_single_quote
162
+ # wait until another quote
163
+ else
164
+ end_here = current_pos
165
+ break
166
+ end
167
+ end
168
+ end
169
+
170
+ if has_single_quote && !found_closing_single_quote
171
+ raise "expecting closing single quote"
172
+ end
173
+
174
+ here = string_range(start_here, end_here)
175
+
176
+ delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote, advance: false
177
+ else
178
+ @token.type = :"<<"
179
+ end
180
+ else
181
+ @token.type = :"<"
182
+ end
183
+ when '>'
184
+ case next_char
185
+ when '='
186
+ next_char :">="
187
+ when '>'
188
+ case next_char
189
+ when '='
190
+ next_char :">>="
191
+ else
192
+ @token.type = :">>"
193
+ end
194
+ else
195
+ @token.type = :">"
196
+ end
197
+ when '+'
198
+ @token.start = start
199
+ case next_char
200
+ when '='
201
+ next_char :"+="
202
+ when '0'
203
+ scan_zero_number(start)
204
+ when '1', '2', '3', '4', '5', '6', '7', '8', '9'
205
+ scan_number(start)
206
+ when '+'
207
+ raise "postfix increment is not supported, use `exp += 1`"
208
+ else
209
+ @token.type = :"+"
210
+ end
211
+ when '-'
212
+ @token.start = start
213
+ case next_char
214
+ when '='
215
+ next_char :"-="
216
+ when '>'
217
+ next_char :"->"
218
+ when '0'
219
+ scan_zero_number start, negative: true
220
+ when '1', '2', '3', '4', '5', '6', '7', '8', '9'
221
+ scan_number start, negative: true
222
+ when '-'
223
+ raise "postfix decrement is not supported, use `exp -= 1`"
224
+ else
225
+ @token.type = :"-"
226
+ end
227
+ when '*'
228
+ case next_char
229
+ when '='
230
+ next_char :"*="
231
+ when '*'
232
+ case next_char
233
+ when '='
234
+ next_char :"**="
235
+ else
236
+ @token.type = :"**"
237
+ end
238
+ else
239
+ @token.type = :"*"
240
+ end
241
+ when '/'
242
+ line = @line_number
243
+ column = @column_number
244
+ char = next_char
245
+ if (@wants_def_or_macro_name || !@slash_is_regex) && char == '/'
246
+ case next_char
247
+ when '='
248
+ next_char :"//="
249
+ else
250
+ @token.type = :"//"
251
+ end
252
+ elsif !@slash_is_regex && char == '='
253
+ next_char :"/="
254
+ elsif @wants_def_or_macro_name
255
+ @token.type = :"/"
256
+ elsif @slash_is_regex
257
+ @token.type = :DELIMITER_START
258
+ @token.delimiter_state = Token::DelimiterState.new(:regex, '/', '/')
259
+ @token.raw = "/"
260
+ elsif char.ascii_whitespace? || char == '\0'
261
+ @token.type = :"/"
262
+ elsif @wants_regex
263
+ @token.type = :DELIMITER_START
264
+ @token.delimiter_state = Token::DelimiterState.new(:regex, '/', '/')
265
+ @token.raw = "/"
266
+ else
267
+ @token.type = :"/"
268
+ end
269
+ when '%'
270
+ if @wants_def_or_macro_name
271
+ next_char :"%"
272
+ else
273
+ case next_char
274
+ when '='
275
+ next_char :"%="
276
+ when '(', '[', '{', '<', '|'
277
+ delimited_pair :string, current_char, closing_char, start
278
+ when 'i'
279
+ case peek_next_char
280
+ when '(', '{', '[', '<', '|'
281
+ start_char = next_char
282
+ next_char :SYMBOL_ARRAY_START
283
+ @token.raw = "%i#{start_char}" if @wants_raw
284
+ @token.delimiter_state = Token::DelimiterState.new(:symbol_array, start_char, closing_char(start_char))
285
+ else
286
+ @token.type = :"%"
287
+ end
288
+ when 'q'
289
+ case peek_next_char
290
+ when '(', '{', '[', '<', '|'
291
+ next_char
292
+ delimited_pair :string, current_char, closing_char, start, allow_escapes: false
293
+ else
294
+ @token.type = :"%"
295
+ end
296
+ when 'Q'
297
+ case peek_next_char
298
+ when '(', '{', '[', '<', '|'
299
+ next_char
300
+ delimited_pair :string, current_char, closing_char, start
301
+ else
302
+ @token.type = :"%"
303
+ end
304
+ when 'r'
305
+ case next_char
306
+ when '(', '[', '{', '<', '|'
307
+ delimited_pair :regex, current_char, closing_char, start
308
+ else
309
+ raise "unknown %r char"
310
+ end
311
+ when 'x'
312
+ case next_char
313
+ when '(', '[', '{', '<', '|'
314
+ delimited_pair :command, current_char, closing_char, start
315
+ else
316
+ raise "unknown %x char"
317
+ end
318
+ when 'w'
319
+ case peek_next_char
320
+ when '(', '{', '[', '<', '|'
321
+ start_char = next_char
322
+ next_char :STRING_ARRAY_START
323
+ @token.raw = "%w#{start_char}" if @wants_raw
324
+ @token.delimiter_state = Token::DelimiterState.new(:string_array, start_char, closing_char(start_char))
325
+ else
326
+ @token.type = :"%"
327
+ end
328
+ when '}'
329
+ next_char :"%}"
330
+ else
331
+ @token.type = :"%"
332
+ end
333
+ end
334
+ when '(' then next_char :"("
335
+ when ')' then next_char :")"
336
+ when '{'
337
+ char = next_char
338
+ case char
339
+ when '%'
340
+ next_char :"{%"
341
+ when '{'
342
+ next_char :"{{"
343
+ else
344
+ @token.type = :"{"
345
+ end
346
+ when '}' then next_char :"}"
347
+ when '['
348
+ case next_char
349
+ when ']'
350
+ case next_char
351
+ when '='
352
+ next_char :"[]="
353
+ when '?'
354
+ next_char :"[]?"
355
+ else
356
+ @token.type = :"[]"
357
+ end
358
+ else
359
+ @token.type = :"["
360
+ end
361
+ when ']' then next_char :"]"
362
+ when ',' then next_char :","
363
+ when '?' then next_char :"?"
364
+ when ';'
365
+ reset_regex_flags = false
366
+ next_char :";"
367
+ when ':'
368
+ char = next_char
369
+
370
+ if @wants_symbol
371
+ case char
372
+ when ':'
373
+ next_char :"::"
374
+ when '+'
375
+ next_char_and_symbol "+"
376
+ when '-'
377
+ next_char_and_symbol "-"
378
+ when '*'
379
+ if next_char == '*'
380
+ next_char_and_symbol "**"
381
+ else
382
+ symbol "*"
383
+ end
384
+ when '/'
385
+ case next_char
386
+ when '/'
387
+ next_char_and_symbol "//"
388
+ else
389
+ symbol "/"
390
+ end
391
+ when '='
392
+ case next_char
393
+ when '='
394
+ if next_char == '='
395
+ next_char_and_symbol "==="
396
+ else
397
+ symbol "=="
398
+ end
399
+ when '~'
400
+ next_char_and_symbol "=~"
401
+ else
402
+ unknown_token
403
+ end
404
+ when '!'
405
+ case next_char
406
+ when '='
407
+ next_char_and_symbol "!="
408
+ when '~'
409
+ next_char_and_symbol "!~"
410
+ else
411
+ symbol "!"
412
+ end
413
+ when '<'
414
+ case next_char
415
+ when '='
416
+ if next_char == '>'
417
+ next_char_and_symbol "<=>"
418
+ else
419
+ symbol "<="
420
+ end
421
+ when '<'
422
+ next_char_and_symbol "<<"
423
+ else
424
+ symbol "<"
425
+ end
426
+ when '>'
427
+ case next_char
428
+ when '='
429
+ next_char_and_symbol ">="
430
+ when '>'
431
+ next_char_and_symbol ">>"
432
+ else
433
+ symbol ">"
434
+ end
435
+ when '&'
436
+ case next_char
437
+ when '+'
438
+ next_char_and_symbol "&+"
439
+ when '-'
440
+ next_char_and_symbol "&-"
441
+ when '*'
442
+ case next_char
443
+ when '*'
444
+ next_char_and_symbol "&**"
445
+ else
446
+ symbol "&*"
447
+ end
448
+ else
449
+ symbol "&"
450
+ end
451
+ when '|'
452
+ next_char_and_symbol "|"
453
+ when '^'
454
+ next_char_and_symbol "^"
455
+ when '~'
456
+ next_char_and_symbol "~"
457
+ when '%'
458
+ next_char_and_symbol "%"
459
+ when '['
460
+ if next_char == ']'
461
+ case next_char
462
+ when '='
463
+ next_char_and_symbol "[]="
464
+ when '?'
465
+ next_char_and_symbol "[]?"
466
+ else
467
+ symbol "[]"
468
+ end
469
+ else
470
+ unknown_token
471
+ end
472
+ when '"'
473
+ line = @line_number
474
+ column = @column_number
475
+ start = current_pos + 1
476
+ io = IO::Memory.new
477
+ while true
478
+ char = next_char
479
+ case char
480
+ when '\\'
481
+ case char = next_char
482
+ when 'a'
483
+ io << '\a'
484
+ when 'b'
485
+ io << '\b'
486
+ when 'n'
487
+ io << '\n'
488
+ when 'r'
489
+ io << '\r'
490
+ when 't'
491
+ io << '\t'
492
+ when 'v'
493
+ io << '\v'
494
+ when 'f'
495
+ io << '\f'
496
+ when 'e'
497
+ io << '\e'
498
+ when 'x'
499
+ io.write_byte consume_string_hex_escape
500
+ when 'u'
501
+ io << consume_string_unicode_escape
502
+ when '0', '1', '2', '3', '4', '5', '6', '7'
503
+ io.write_byte consume_octal_escape(char)
504
+ when '\n'
505
+ incr_line_number nil
506
+ io << '\n'
507
+ when '\0'
508
+ raise "unterminated quoted symbol", line, column
509
+ else
510
+ io << char
511
+ end
512
+ when '"'
513
+ break
514
+ when '\0'
515
+ raise "unterminated quoted symbol", line, column
516
+ else
517
+ io << char
518
+ end
519
+ end
520
+
521
+ @token.type = :SYMBOL
522
+ @token.value = io.to_s
523
+ next_char
524
+ set_token_raw_from_start(start - 2)
525
+ else
526
+ if ident_start?(char)
527
+ start = current_pos
528
+ while ident_part?(next_char)
529
+ # Nothing to do
530
+ end
531
+ if current_char == '?' || ((current_char == '!' || current_char == '=') && peek_next_char != '=')
532
+ next_char
533
+ end
534
+ @token.type = :SYMBOL
535
+ @token.value = string_range_from_pool(start)
536
+ set_token_raw_from_start(start - 1)
537
+ else
538
+ @token.type = :":"
539
+ end
540
+ end
541
+ else
542
+ case char
543
+ when ':'
544
+ next_char :"::"
545
+ else
546
+ @token.type = :":"
547
+ end
548
+ end
549
+ when '~'
550
+ next_char :"~"
551
+ when '.'
552
+ case next_char
553
+ when '.'
554
+ case next_char
555
+ when '.'
556
+ next_char :"..."
557
+ else
558
+ @token.type = :".."
559
+ end
560
+ else
561
+ @token.type = :"."
562
+ end
563
+ when '&'
564
+ case next_char
565
+ when '&'
566
+ case next_char
567
+ when '='
568
+ next_char :"&&="
569
+ else
570
+ @token.type = :"&&"
571
+ end
572
+ when '='
573
+ next_char :"&="
574
+ when '+'
575
+ case next_char
576
+ when '='
577
+ next_char :"&+="
578
+ else
579
+ @token.type = :"&+"
580
+ end
581
+ when '-'
582
+ # Check if '>' comes after '&-', making it '&->'.
583
+ # We want to parse that like '&(->...)',
584
+ # so we only return '&' for now.
585
+ if peek_next_char == '>'
586
+ @token.type = :"&"
587
+ else
588
+ case next_char
589
+ when '='
590
+ next_char :"&-="
591
+ else
592
+ @token.type = :"&-"
593
+ end
594
+ end
595
+ when '*'
596
+ case next_char
597
+ when '*'
598
+ next_char :"&**"
599
+ when '='
600
+ next_char :"&*="
601
+ else
602
+ @token.type = :"&*"
603
+ end
604
+ else
605
+ @token.type = :"&"
606
+ end
607
+ when '|'
608
+ case next_char
609
+ when '|'
610
+ case next_char
611
+ when '='
612
+ next_char :"||="
613
+ else
614
+ @token.type = :"||"
615
+ end
616
+ when '='
617
+ next_char :"|="
618
+ else
619
+ @token.type = :"|"
620
+ end
621
+ when '^'
622
+ case next_char
623
+ when '='
624
+ next_char :"^="
625
+ else
626
+ @token.type = :"^"
627
+ end
628
+ when '"', '`', '\''
629
+ delimiter = current_char
630
+ if delimiter == '`' && @wants_def_or_macro_name
631
+ next_char :"`"
632
+ else
633
+ next_char
634
+ @token.type = :DELIMITER_START
635
+ @token.delimiter_state = Token::DelimiterState.new(delimiter == '`' ? :command : :string, delimiter, delimiter)
636
+ set_token_raw_from_start(start)
637
+ end
638
+ when '0'
639
+ scan_zero_number(start)
640
+ when '1', '2', '3', '4', '5', '6', '7', '8', '9'
641
+ scan_number current_pos
642
+ when '@'
643
+ start = current_pos
644
+ case next_char
645
+ when '['
646
+ next_char :"@["
647
+ else
648
+ class_var = false
649
+ if current_char == '@'
650
+ class_var = true
651
+ next_char
652
+ end
653
+ if ident_start?(current_char)
654
+ while ident_part?(next_char)
655
+ # Nothing to do
656
+ end
657
+ @token.type = class_var ? :CLASS_VAR : :INSTANCE_VAR
658
+ @token.value = string_range_from_pool(start)
659
+ else
660
+ unknown_token
661
+ end
662
+ end
663
+ when '$'
664
+ start = current_pos
665
+ next_char
666
+ case current_char
667
+ when '~'
668
+ next_char
669
+ @token.type = :"$~"
670
+ when '?'
671
+ next_char
672
+ @token.type = :"$?"
673
+ when .ascii_number?
674
+ start = current_pos
675
+ char = next_char
676
+ if char == '0'
677
+ char = next_char
678
+ else
679
+ while char.ascii_number?
680
+ char = next_char
681
+ end
682
+ char = next_char if char == '?'
683
+ end
684
+ @token.type = :GLOBAL_MATCH_DATA_INDEX
685
+ @token.value = string_range_from_pool(start)
686
+ else
687
+ if ident_start?(current_char)
688
+ while ident_part?(next_char)
689
+ # Nothing to do
690
+ end
691
+ @token.type = :GLOBAL
692
+ @token.value = string_range_from_pool(start)
693
+ else
694
+ unknown_token
695
+ end
696
+ end
697
+ when 'a'
698
+ case next_char
699
+ when 'b'
700
+ if next_char == 's' && next_char == 't' && next_char == 'r' && next_char == 'a' && next_char == 'c' && next_char == 't'
701
+ return check_ident_or_keyword(:abstract, start)
702
+ end
703
+ when 'l'
704
+ if next_char == 'i' && next_char == 'a' && next_char == 's'
705
+ return check_ident_or_keyword(:alias, start)
706
+ end
707
+ when 's'
708
+ peek = peek_next_char
709
+ case peek
710
+ when 'm'
711
+ next_char
712
+ return check_ident_or_keyword(:asm, start)
713
+ when '?'
714
+ next_char
715
+ next_char
716
+ @token.type = :IDENT
717
+ @token.value = :as?
718
+ return @token
719
+ else
720
+ return check_ident_or_keyword(:as, start)
721
+ end
722
+ when 'n'
723
+ case next_char
724
+ when 'd'
725
+ @token.type = :and
726
+ when 'n'
727
+ if next_char == 'o' && next_char == 't' && next_char == 'a' && next_char == 't' && next_char == 'i' && next_char == 'o' && next_char == 'n'
728
+ return check_ident_or_keyword(:annotation, start)
729
+ end
730
+ end
731
+ else
732
+ # scan_ident
733
+ end
734
+ scan_ident(start)
735
+ when 'b'
736
+ case next_char
737
+ when 'e'
738
+ if next_char == 'g' && next_char == 'i' && next_char == 'n'
739
+ return check_ident_or_keyword(:begin, start)
740
+ end
741
+ when 'r'
742
+ if next_char == 'e' && next_char == 'a' && next_char == 'k'
743
+ return check_ident_or_keyword(:break, start)
744
+ end
745
+ else
746
+ # scan_ident
747
+ end
748
+ scan_ident(start)
749
+ when 'c'
750
+ case next_char
751
+ when 'a'
752
+ if next_char == 's' && next_char == 'e'
753
+ return check_ident_or_keyword(:case, start)
754
+ end
755
+ when 'l'
756
+ if next_char == 'a' && next_char == 's' && next_char == 's'
757
+ return check_ident_or_keyword(:class, start)
758
+ end
759
+ else
760
+ # scan_ident
761
+ end
762
+ scan_ident(start)
763
+ when 'd'
764
+ case next_char
765
+ when 'e'
766
+ if next_char == 'f'
767
+ return check_ident_or_keyword(:def, start)
768
+ end
769
+ when 'o' then return check_ident_or_keyword(:do, start)
770
+ else
771
+ # scan_ident
772
+ end
773
+ scan_ident(start)
774
+ when 'e'
775
+ case next_char
776
+ when 'l'
777
+ case next_char
778
+ when 's'
779
+ case next_char
780
+ when 'e' then return check_ident_or_keyword(:else, start)
781
+ when 'i'
782
+ if next_char == 'f'
783
+ return check_ident_or_keyword(:elsif, start)
784
+ end
785
+ else
786
+ # scan_ident
787
+ end
788
+ else
789
+ # scan_ident
790
+ end
791
+ when 'n'
792
+ case next_char
793
+ when 'd'
794
+ return check_ident_or_keyword(:end, start)
795
+ when 's'
796
+ if next_char == 'u' && next_char == 'r' && next_char == 'e'
797
+ return check_ident_or_keyword(:ensure, start)
798
+ end
799
+ when 'u'
800
+ if next_char == 'm'
801
+ return check_ident_or_keyword(:enum, start)
802
+ end
803
+ else
804
+ # scan_ident
805
+ end
806
+ when 'x'
807
+ if next_char == 't' && next_char == 'e' && next_char == 'n' && next_char == 'd'
808
+ return check_ident_or_keyword(:extend, start)
809
+ end
810
+ else
811
+ # scan_ident
812
+ end
813
+ scan_ident(start)
814
+ when 'f'
815
+ case next_char
816
+ when 'a'
817
+ if next_char == 'l' && next_char == 's' && next_char == 'e'
818
+ return check_ident_or_keyword(:false, start)
819
+ end
820
+ when 'o'
821
+ if next_char == 'r'
822
+ return check_ident_or_keyword(:for, start)
823
+ end
824
+ when 'u'
825
+ if next_char == 'n'
826
+ return check_ident_or_keyword(:fun, start)
827
+ end
828
+ else
829
+ # scan_ident
830
+ end
831
+ scan_ident(start)
832
+ when 'i'
833
+ case next_char
834
+ when 'f'
835
+ return check_ident_or_keyword(:if, start)
836
+ when 'n'
837
+ if ident_part_or_end?(peek_next_char)
838
+ case next_char
839
+ when 'c'
840
+ if next_char == 'l' && next_char == 'u' && next_char == 'd' && next_char == 'e'
841
+ return check_ident_or_keyword(:include, start)
842
+ end
843
+ when 's'
844
+ if next_char == 't' && next_char == 'a' && next_char == 'n' && next_char == 'c' && next_char == 'e' && next_char == '_' && next_char == 's' && next_char == 'i' && next_char == 'z' && next_char == 'e' && next_char == 'o' && next_char == 'f'
845
+ return check_ident_or_keyword(:instance_sizeof, start)
846
+ end
847
+ else
848
+ # scan_ident
849
+ end
850
+ else
851
+ next_char
852
+ @token.type = :IDENT
853
+ @token.value = :in
854
+ return @token
855
+ end
856
+ when 's'
857
+ if next_char == '_' && next_char == 'a' && next_char == '?'
858
+ return check_ident_or_keyword(:is_a?, start)
859
+ end
860
+ else
861
+ # scan_ident
862
+ end
863
+ scan_ident(start)
864
+ when 'l'
865
+ case next_char
866
+ when 'i'
867
+ if next_char == 'b'
868
+ return check_ident_or_keyword(:lib, start)
869
+ end
870
+ else
871
+ # scan_ident
872
+ end
873
+ scan_ident(start)
874
+ when 'm'
875
+ case next_char
876
+ when 'a'
877
+ if next_char == 'c' && next_char == 'r' && next_char == 'o'
878
+ return check_ident_or_keyword(:macro, start)
879
+ end
880
+ when 'o'
881
+ case next_char
882
+ when 'd'
883
+ if next_char == 'u' && next_char == 'l' && next_char == 'e'
884
+ return check_ident_or_keyword(:module, start)
885
+ end
886
+ else
887
+ # scan_ident
888
+ end
889
+ else
890
+ # scan_ident
891
+ end
892
+ scan_ident(start)
893
+ when 'n'
894
+ case next_char
895
+ when 'e'
896
+ if next_char == 'x' && next_char == 't'
897
+ return check_ident_or_keyword(:next, start)
898
+ end
899
+ when 'i'
900
+ case next_char
901
+ when 'l'
902
+ if peek_next_char == '?'
903
+ next_char
904
+ return check_ident_or_keyword(:nil?, start)
905
+ else
906
+ return check_ident_or_keyword(:nil, start)
907
+ end
908
+ else
909
+ # scan_ident
910
+ end
911
+ else
912
+ # scan_ident
913
+ end
914
+ scan_ident(start)
915
+ when 'o'
916
+ case next_char
917
+ when 'f'
918
+ if peek_next_char == 'f'
919
+ next_char
920
+ if next_char == 's' && next_char == 'e' && next_char == 't' && next_char == 'o' && next_char == 'f'
921
+ return check_ident_or_keyword(:offsetof, start)
922
+ end
923
+ else
924
+ return check_ident_or_keyword(:of, start)
925
+ end
926
+ when 'u'
927
+ if next_char == 't'
928
+ return check_ident_or_keyword(:out, start)
929
+ end
930
+ else
931
+ # scan_ident
932
+ end
933
+ scan_ident(start)
934
+ when 'p'
935
+ case next_char
936
+ when 'o'
937
+ if next_char == 'i' && next_char == 'n' && next_char == 't' && next_char == 'e' && next_char == 'r' && next_char == 'o' && next_char == 'f'
938
+ return check_ident_or_keyword(:pointerof, start)
939
+ end
940
+ when 'r'
941
+ case next_char
942
+ when 'i'
943
+ if next_char == 'v' && next_char == 'a' && next_char == 't' && next_char == 'e'
944
+ return check_ident_or_keyword(:private, start)
945
+ end
946
+ when 'o'
947
+ if next_char == 't' && next_char == 'e' && next_char == 'c' && next_char == 't' && next_char == 'e' && next_char == 'd'
948
+ return check_ident_or_keyword(:protected, start)
949
+ end
950
+ else
951
+ # scan_ident
952
+ end
953
+ else
954
+ # scan_ident
955
+ end
956
+ scan_ident(start)
957
+ when 'r'
958
+ case next_char
959
+ when 'e'
960
+ case next_char
961
+ when 's'
962
+ case next_char
963
+ when 'c'
964
+ if next_char == 'u' && next_char == 'e'
965
+ return check_ident_or_keyword(:rescue, start)
966
+ end
967
+ when 'p'
968
+ if next_char == 'o' && next_char == 'n' && next_char == 'd' && next_char == 's' && next_char == '_' && next_char == 't' && next_char == 'o' && next_char == '?'
969
+ return check_ident_or_keyword(:responds_to?, start)
970
+ end
971
+ else
972
+ # scan_ident
973
+ end
974
+ when 't'
975
+ if next_char == 'u' && next_char == 'r' && next_char == 'n'
976
+ return check_ident_or_keyword(:return, start)
977
+ end
978
+ when 'q'
979
+ if next_char == 'u' && next_char == 'i' && next_char == 'r' && next_char == 'e'
980
+ return check_ident_or_keyword(:require, start)
981
+ end
982
+ else
983
+ # scan_ident
984
+ end
985
+ else
986
+ # scan_ident
987
+ end
988
+ scan_ident(start)
989
+ when 's'
990
+ case next_char
991
+ when 'e'
992
+ if next_char == 'l'
993
+ case next_char
994
+ when 'e'
995
+ if next_char == 'c' && next_char == 't'
996
+ return check_ident_or_keyword(:select, start)
997
+ end
998
+ when 'f'
999
+ return check_ident_or_keyword(:self, start)
1000
+ else
1001
+ # scan_ident
1002
+ end
1003
+ end
1004
+ when 'i'
1005
+ if next_char == 'z' && next_char == 'e' && next_char == 'o' && next_char == 'f'
1006
+ return check_ident_or_keyword(:sizeof, start)
1007
+ end
1008
+ when 't'
1009
+ if next_char == 'r' && next_char == 'u' && next_char == 'c' && next_char == 't'
1010
+ return check_ident_or_keyword(:struct, start)
1011
+ end
1012
+ when 'u'
1013
+ if next_char == 'p' && next_char == 'e' && next_char == 'r'
1014
+ return check_ident_or_keyword(:super, start)
1015
+ end
1016
+ else
1017
+ # scan_ident
1018
+ end
1019
+ scan_ident(start)
1020
+ when 't'
1021
+ case next_char
1022
+ when 'h'
1023
+ if next_char == 'e' && next_char == 'n'
1024
+ return check_ident_or_keyword(:then, start)
1025
+ end
1026
+ when 'r'
1027
+ if next_char == 'u' && next_char == 'e'
1028
+ return check_ident_or_keyword(:true, start)
1029
+ end
1030
+ when 'y'
1031
+ if next_char == 'p' && next_char == 'e'
1032
+ if peek_next_char == 'o'
1033
+ next_char
1034
+ if next_char == 'f'
1035
+ return check_ident_or_keyword(:typeof, start)
1036
+ end
1037
+ else
1038
+ return check_ident_or_keyword(:type, start)
1039
+ end
1040
+ end
1041
+ else
1042
+ # scan_ident
1043
+ end
1044
+ scan_ident(start)
1045
+ when 'u'
1046
+ if next_char == 'n'
1047
+ case next_char
1048
+ when 'i'
1049
+ case next_char
1050
+ when 'o'
1051
+ if next_char == 'n'
1052
+ return check_ident_or_keyword(:union, start)
1053
+ end
1054
+ when 'n'
1055
+ if next_char == 'i' && next_char == 't' && next_char == 'i' && next_char == 'a' && next_char == 'l' && next_char == 'i' && next_char == 'z' && next_char == 'e' && next_char == 'd'
1056
+ return check_ident_or_keyword(:uninitialized, start)
1057
+ end
1058
+ else
1059
+ # scan_ident
1060
+ end
1061
+ when 'l'
1062
+ if next_char == 'e' && next_char == 's' && next_char == 's'
1063
+ return check_ident_or_keyword(:unless, start)
1064
+ end
1065
+ when 't'
1066
+ if next_char == 'i' && next_char == 'l'
1067
+ return check_ident_or_keyword(:until, start)
1068
+ end
1069
+ else
1070
+ # scan_ident
1071
+ end
1072
+ end
1073
+ scan_ident(start)
1074
+ when 'v'
1075
+ if next_char == 'e' && next_char == 'r' && next_char == 'b' && next_char == 'a' && next_char == 't' && next_char == 'i' && next_char == 'm'
1076
+ return check_ident_or_keyword(:verbatim, start)
1077
+ end
1078
+ scan_ident(start)
1079
+ when 'w'
1080
+ case next_char
1081
+ when 'h'
1082
+ case next_char
1083
+ when 'e'
1084
+ if next_char == 'n'
1085
+ return check_ident_or_keyword(:when, start)
1086
+ end
1087
+ when 'i'
1088
+ if next_char == 'l' && next_char == 'e'
1089
+ return check_ident_or_keyword(:while, start)
1090
+ end
1091
+ else
1092
+ # scan_ident
1093
+ end
1094
+ when 'i'
1095
+ if next_char == 't' && next_char == 'h'
1096
+ return check_ident_or_keyword(:with, start)
1097
+ end
1098
+ else
1099
+ # scan_ident
1100
+ end
1101
+ scan_ident(start)
1102
+ when 'y'
1103
+ if next_char == 'i' && next_char == 'e' && next_char == 'l' && next_char == 'd'
1104
+ return check_ident_or_keyword(:yield, start)
1105
+ end
1106
+ scan_ident(start)
1107
+ when '_'
1108
+ case next_char
1109
+ when '_'
1110
+ case next_char
1111
+ when 'D'
1112
+ if next_char == 'I' && next_char == 'R' && next_char == '_' && next_char == '_'
1113
+ if ident_part_or_end?(peek_next_char)
1114
+ scan_ident(start)
1115
+ else
1116
+ next_char
1117
+ @token.type = :__DIR__
1118
+ return @token
1119
+ end
1120
+ end
1121
+ when 'E'
1122
+ if next_char == 'N' && next_char == 'D' && next_char == '_' && next_char == 'L' && next_char == 'I' && next_char == 'N' && next_char == 'E' && next_char == '_' && next_char == '_'
1123
+ if ident_part_or_end?(peek_next_char)
1124
+ scan_ident(start)
1125
+ else
1126
+ next_char
1127
+ @token.type = :__END_LINE__
1128
+ return @token
1129
+ end
1130
+ end
1131
+ when 'F'
1132
+ if next_char == 'I' && next_char == 'L' && next_char == 'E' && next_char == '_' && next_char == '_'
1133
+ if ident_part_or_end?(peek_next_char)
1134
+ scan_ident(start)
1135
+ else
1136
+ next_char
1137
+ @token.type = :__FILE__
1138
+ return @token
1139
+ end
1140
+ end
1141
+ when 'L'
1142
+ if next_char == 'I' && next_char == 'N' && next_char == 'E' && next_char == '_' && next_char == '_'
1143
+ if ident_part_or_end?(peek_next_char)
1144
+ scan_ident(start)
1145
+ else
1146
+ next_char
1147
+ @token.type = :__LINE__
1148
+ return @token
1149
+ end
1150
+ end
1151
+ else
1152
+ # scan_ident
1153
+ end
1154
+ else
1155
+ unless ident_part?(current_char)
1156
+ @token.type = :UNDERSCORE
1157
+ return @token
1158
+ end
1159
+ end
1160
+
1161
+ scan_ident(start)
1162
+ else
1163
+ if current_char.ascii_uppercase?
1164
+ start = current_pos
1165
+ while ident_part?(next_char)
1166
+ # Nothing to do
1167
+ end
1168
+ @token.type = :CONST
1169
+ @token.value = string_range_from_pool(start)
1170
+ elsif current_char.ascii_lowercase? || current_char == '_' || current_char.ord > 0x9F
1171
+ next_char
1172
+ scan_ident(start)
1173
+ else
1174
+ unknown_token
1175
+ end
1176
+ end
1177
+
1178
+ if reset_regex_flags
1179
+ @wants_regex = true
1180
+ @slash_is_regex = false
1181
+ end
1182
+
1183
+ @token
1184
+ end
1185
+ end
1186
+ end