gloss 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ require "./lib/cr_ruby"
2
+ require "./cr_ast"
3
+ require "./rb_ast"
4
+ require "./parser"
5
+
6
+ def parse_buffer(self : CrRuby::VALUE, buffer : CrRuby::VALUE)
7
+ plain_buffer = CrRuby.rb_str_to_str(buffer)
8
+ c_buffer = CrRuby.rb_string_value_cstr(pointerof(plain_buffer))
9
+ crystal_buffer = String.new(c_buffer)
10
+
11
+ output = parse_buffer(crystal_buffer)
12
+
13
+ CrRuby.rb_str_new_cstr(output)
14
+ end
15
+
16
+ def parse_buffer(buffer : String)
17
+ begin
18
+ tree = Gloss::Parser.parse buffer
19
+ tree.to_rb.to_json
20
+ rescue e : Crystal::SyntaxException
21
+ pp e.backtrace
22
+ e.to_s
23
+ end
24
+ end
25
+
26
+ fun init = Init_gloss
27
+ GC.init
28
+ LibCrystalMain.__crystal_main(0, Pointer(Pointer(UInt8)).null)
29
+ gloss = CrRuby.rb_define_module("Gloss");
30
+ CrRuby.rb_define_singleton_method(gloss, "parse_buffer", ->parse_buffer(CrRuby::VALUE, CrRuby::VALUE), 1);
31
+ end
@@ -0,0 +1,1186 @@
1
+ require "compiler/crystal/syntax/*"
2
+
3
+ module Crystal
4
+ class Lexer
5
+ def next_token
6
+ reset_token
7
+
8
+ # Skip comments
9
+ while current_char == '#'
10
+ start = current_pos
11
+
12
+ # Check #<loc:...> pragma comment
13
+ if next_char_no_column_increment == '<' &&
14
+ next_char_no_column_increment == 'l' &&
15
+ next_char_no_column_increment == 'o' &&
16
+ next_char_no_column_increment == 'c' &&
17
+ next_char_no_column_increment == ':'
18
+ next_char_no_column_increment
19
+ consume_loc_pragma
20
+ start = current_pos
21
+ else
22
+ if @doc_enabled
23
+ consume_doc
24
+ elsif @comments_enabled
25
+ return consume_comment(start)
26
+ else
27
+ skip_comment
28
+ end
29
+ end
30
+ end
31
+
32
+ start = current_pos
33
+
34
+ # Fix location by `macro_expansion_pragmas`.
35
+ if me_pragmas = macro_expansion_pragmas
36
+ # It might happen that the current "start" already passed some
37
+ # location pragmas, so we must consume all of those. For example
38
+ # if one does `@{{...}}` inside a macro, "start" will be one less
39
+ # number than the pragma, and after consuming "@..." if we don't
40
+ # consume the pragmas generated by `{{...}}` we'll be in an
41
+ # incorrect location.
42
+ while me_pragmas.first_key?.try &.<=(start)
43
+ _, pragmas = me_pragmas.shift
44
+ pragmas.each &.run_pragma self
45
+ end
46
+ end
47
+
48
+ reset_regex_flags = true
49
+
50
+ case current_char
51
+ when '\0'
52
+ @token.type = :EOF
53
+ when ' ', '\t'
54
+ consume_whitespace
55
+ reset_regex_flags = false
56
+ when '\\'
57
+ if next_char == '\n'
58
+ incr_line_number
59
+ @token.passed_backslash_newline = true
60
+ consume_whitespace
61
+ reset_regex_flags = false
62
+ else
63
+ unknown_token
64
+ end
65
+ when '\n'
66
+ @token.type = :NEWLINE
67
+ next_char
68
+ incr_line_number
69
+ reset_regex_flags = false
70
+ consume_newlines
71
+ when '\r'
72
+ if next_char == '\n'
73
+ next_char
74
+ @token.type = :NEWLINE
75
+ incr_line_number
76
+ consume_newlines
77
+ else
78
+ raise "expected '\\n' after '\\r'"
79
+ end
80
+ when '='
81
+ case next_char
82
+ when '='
83
+ case next_char
84
+ when '='
85
+ next_char :"==="
86
+ else
87
+ @token.type = :"=="
88
+ end
89
+ when '>'
90
+ next_char :"=>"
91
+ when '~'
92
+ next_char :"=~"
93
+ else
94
+ @token.type = :"="
95
+ end
96
+ when '!'
97
+ case next_char
98
+ when '='
99
+ next_char :"!="
100
+ when '~'
101
+ next_char :"!~"
102
+ else
103
+ @token.type = :"!"
104
+ end
105
+ when '<'
106
+ case next_char
107
+ when '='
108
+ case next_char
109
+ when '>'
110
+ next_char :"<=>"
111
+ else
112
+ @token.type = :"<="
113
+ end
114
+ when '<'
115
+ case next_char
116
+ when '='
117
+ next_char :"<<="
118
+ when '-'
119
+ has_single_quote = false
120
+ found_closing_single_quote = false
121
+
122
+ char = next_char
123
+ start_here = current_pos
124
+
125
+ if char == '\''
126
+ has_single_quote = true
127
+ char = next_char
128
+ start_here = current_pos
129
+ end
130
+
131
+ unless ident_part?(char)
132
+ raise "heredoc identifier starts with invalid character"
133
+ end
134
+
135
+ end_here = 0
136
+
137
+ while true
138
+ char = next_char
139
+ case
140
+ when char == '\r'
141
+ if peek_next_char == '\n'
142
+ end_here = current_pos
143
+ next_char
144
+ break
145
+ else
146
+ raise "expecting '\\n' after '\\r'"
147
+ end
148
+ when char == '\n'
149
+ end_here = current_pos
150
+ break
151
+ when ident_part?(char)
152
+ # ok
153
+ when char == '\0'
154
+ raise "Unexpected EOF on heredoc identifier"
155
+ else
156
+ if char == '\'' && has_single_quote
157
+ found_closing_single_quote = true
158
+ end_here = current_pos
159
+ next_char
160
+ break
161
+ elsif has_single_quote
162
+ # wait until another quote
163
+ else
164
+ end_here = current_pos
165
+ break
166
+ end
167
+ end
168
+ end
169
+
170
+ if has_single_quote && !found_closing_single_quote
171
+ raise "expecting closing single quote"
172
+ end
173
+
174
+ here = string_range(start_here, end_here)
175
+
176
+ delimited_pair :heredoc, here, here, start, allow_escapes: !has_single_quote, advance: false
177
+ else
178
+ @token.type = :"<<"
179
+ end
180
+ else
181
+ @token.type = :"<"
182
+ end
183
+ when '>'
184
+ case next_char
185
+ when '='
186
+ next_char :">="
187
+ when '>'
188
+ case next_char
189
+ when '='
190
+ next_char :">>="
191
+ else
192
+ @token.type = :">>"
193
+ end
194
+ else
195
+ @token.type = :">"
196
+ end
197
+ when '+'
198
+ @token.start = start
199
+ case next_char
200
+ when '='
201
+ next_char :"+="
202
+ when '0'
203
+ scan_zero_number(start)
204
+ when '1', '2', '3', '4', '5', '6', '7', '8', '9'
205
+ scan_number(start)
206
+ when '+'
207
+ raise "postfix increment is not supported, use `exp += 1`"
208
+ else
209
+ @token.type = :"+"
210
+ end
211
+ when '-'
212
+ @token.start = start
213
+ case next_char
214
+ when '='
215
+ next_char :"-="
216
+ when '>'
217
+ next_char :"->"
218
+ when '0'
219
+ scan_zero_number start, negative: true
220
+ when '1', '2', '3', '4', '5', '6', '7', '8', '9'
221
+ scan_number start, negative: true
222
+ when '-'
223
+ raise "postfix decrement is not supported, use `exp -= 1`"
224
+ else
225
+ @token.type = :"-"
226
+ end
227
+ when '*'
228
+ case next_char
229
+ when '='
230
+ next_char :"*="
231
+ when '*'
232
+ case next_char
233
+ when '='
234
+ next_char :"**="
235
+ else
236
+ @token.type = :"**"
237
+ end
238
+ else
239
+ @token.type = :"*"
240
+ end
241
+ when '/'
242
+ line = @line_number
243
+ column = @column_number
244
+ char = next_char
245
+ if (@wants_def_or_macro_name || !@slash_is_regex) && char == '/'
246
+ case next_char
247
+ when '='
248
+ next_char :"//="
249
+ else
250
+ @token.type = :"//"
251
+ end
252
+ elsif !@slash_is_regex && char == '='
253
+ next_char :"/="
254
+ elsif @wants_def_or_macro_name
255
+ @token.type = :"/"
256
+ elsif @slash_is_regex
257
+ @token.type = :DELIMITER_START
258
+ @token.delimiter_state = Token::DelimiterState.new(:regex, '/', '/')
259
+ @token.raw = "/"
260
+ elsif char.ascii_whitespace? || char == '\0'
261
+ @token.type = :"/"
262
+ elsif @wants_regex
263
+ @token.type = :DELIMITER_START
264
+ @token.delimiter_state = Token::DelimiterState.new(:regex, '/', '/')
265
+ @token.raw = "/"
266
+ else
267
+ @token.type = :"/"
268
+ end
269
+ when '%'
270
+ if @wants_def_or_macro_name
271
+ next_char :"%"
272
+ else
273
+ case next_char
274
+ when '='
275
+ next_char :"%="
276
+ when '(', '[', '{', '<', '|'
277
+ delimited_pair :string, current_char, closing_char, start
278
+ when 'i'
279
+ case peek_next_char
280
+ when '(', '{', '[', '<', '|'
281
+ start_char = next_char
282
+ next_char :SYMBOL_ARRAY_START
283
+ @token.raw = "%i#{start_char}" if @wants_raw
284
+ @token.delimiter_state = Token::DelimiterState.new(:symbol_array, start_char, closing_char(start_char))
285
+ else
286
+ @token.type = :"%"
287
+ end
288
+ when 'q'
289
+ case peek_next_char
290
+ when '(', '{', '[', '<', '|'
291
+ next_char
292
+ delimited_pair :string, current_char, closing_char, start, allow_escapes: false
293
+ else
294
+ @token.type = :"%"
295
+ end
296
+ when 'Q'
297
+ case peek_next_char
298
+ when '(', '{', '[', '<', '|'
299
+ next_char
300
+ delimited_pair :string, current_char, closing_char, start
301
+ else
302
+ @token.type = :"%"
303
+ end
304
+ when 'r'
305
+ case next_char
306
+ when '(', '[', '{', '<', '|'
307
+ delimited_pair :regex, current_char, closing_char, start
308
+ else
309
+ raise "unknown %r char"
310
+ end
311
+ when 'x'
312
+ case next_char
313
+ when '(', '[', '{', '<', '|'
314
+ delimited_pair :command, current_char, closing_char, start
315
+ else
316
+ raise "unknown %x char"
317
+ end
318
+ when 'w'
319
+ case peek_next_char
320
+ when '(', '{', '[', '<', '|'
321
+ start_char = next_char
322
+ next_char :STRING_ARRAY_START
323
+ @token.raw = "%w#{start_char}" if @wants_raw
324
+ @token.delimiter_state = Token::DelimiterState.new(:string_array, start_char, closing_char(start_char))
325
+ else
326
+ @token.type = :"%"
327
+ end
328
+ when '}'
329
+ next_char :"%}"
330
+ else
331
+ @token.type = :"%"
332
+ end
333
+ end
334
+ when '(' then next_char :"("
335
+ when ')' then next_char :")"
336
+ when '{'
337
+ char = next_char
338
+ case char
339
+ when '%'
340
+ next_char :"{%"
341
+ when '{'
342
+ next_char :"{{"
343
+ else
344
+ @token.type = :"{"
345
+ end
346
+ when '}' then next_char :"}"
347
+ when '['
348
+ case next_char
349
+ when ']'
350
+ case next_char
351
+ when '='
352
+ next_char :"[]="
353
+ when '?'
354
+ next_char :"[]?"
355
+ else
356
+ @token.type = :"[]"
357
+ end
358
+ else
359
+ @token.type = :"["
360
+ end
361
+ when ']' then next_char :"]"
362
+ when ',' then next_char :","
363
+ when '?' then next_char :"?"
364
+ when ';'
365
+ reset_regex_flags = false
366
+ next_char :";"
367
+ when ':'
368
+ char = next_char
369
+
370
+ if @wants_symbol
371
+ case char
372
+ when ':'
373
+ next_char :"::"
374
+ when '+'
375
+ next_char_and_symbol "+"
376
+ when '-'
377
+ next_char_and_symbol "-"
378
+ when '*'
379
+ if next_char == '*'
380
+ next_char_and_symbol "**"
381
+ else
382
+ symbol "*"
383
+ end
384
+ when '/'
385
+ case next_char
386
+ when '/'
387
+ next_char_and_symbol "//"
388
+ else
389
+ symbol "/"
390
+ end
391
+ when '='
392
+ case next_char
393
+ when '='
394
+ if next_char == '='
395
+ next_char_and_symbol "==="
396
+ else
397
+ symbol "=="
398
+ end
399
+ when '~'
400
+ next_char_and_symbol "=~"
401
+ else
402
+ unknown_token
403
+ end
404
+ when '!'
405
+ case next_char
406
+ when '='
407
+ next_char_and_symbol "!="
408
+ when '~'
409
+ next_char_and_symbol "!~"
410
+ else
411
+ symbol "!"
412
+ end
413
+ when '<'
414
+ case next_char
415
+ when '='
416
+ if next_char == '>'
417
+ next_char_and_symbol "<=>"
418
+ else
419
+ symbol "<="
420
+ end
421
+ when '<'
422
+ next_char_and_symbol "<<"
423
+ else
424
+ symbol "<"
425
+ end
426
+ when '>'
427
+ case next_char
428
+ when '='
429
+ next_char_and_symbol ">="
430
+ when '>'
431
+ next_char_and_symbol ">>"
432
+ else
433
+ symbol ">"
434
+ end
435
+ when '&'
436
+ case next_char
437
+ when '+'
438
+ next_char_and_symbol "&+"
439
+ when '-'
440
+ next_char_and_symbol "&-"
441
+ when '*'
442
+ case next_char
443
+ when '*'
444
+ next_char_and_symbol "&**"
445
+ else
446
+ symbol "&*"
447
+ end
448
+ else
449
+ symbol "&"
450
+ end
451
+ when '|'
452
+ next_char_and_symbol "|"
453
+ when '^'
454
+ next_char_and_symbol "^"
455
+ when '~'
456
+ next_char_and_symbol "~"
457
+ when '%'
458
+ next_char_and_symbol "%"
459
+ when '['
460
+ if next_char == ']'
461
+ case next_char
462
+ when '='
463
+ next_char_and_symbol "[]="
464
+ when '?'
465
+ next_char_and_symbol "[]?"
466
+ else
467
+ symbol "[]"
468
+ end
469
+ else
470
+ unknown_token
471
+ end
472
+ when '"'
473
+ line = @line_number
474
+ column = @column_number
475
+ start = current_pos + 1
476
+ io = IO::Memory.new
477
+ while true
478
+ char = next_char
479
+ case char
480
+ when '\\'
481
+ case char = next_char
482
+ when 'a'
483
+ io << '\a'
484
+ when 'b'
485
+ io << '\b'
486
+ when 'n'
487
+ io << '\n'
488
+ when 'r'
489
+ io << '\r'
490
+ when 't'
491
+ io << '\t'
492
+ when 'v'
493
+ io << '\v'
494
+ when 'f'
495
+ io << '\f'
496
+ when 'e'
497
+ io << '\e'
498
+ when 'x'
499
+ io.write_byte consume_string_hex_escape
500
+ when 'u'
501
+ io << consume_string_unicode_escape
502
+ when '0', '1', '2', '3', '4', '5', '6', '7'
503
+ io.write_byte consume_octal_escape(char)
504
+ when '\n'
505
+ incr_line_number nil
506
+ io << '\n'
507
+ when '\0'
508
+ raise "unterminated quoted symbol", line, column
509
+ else
510
+ io << char
511
+ end
512
+ when '"'
513
+ break
514
+ when '\0'
515
+ raise "unterminated quoted symbol", line, column
516
+ else
517
+ io << char
518
+ end
519
+ end
520
+
521
+ @token.type = :SYMBOL
522
+ @token.value = io.to_s
523
+ next_char
524
+ set_token_raw_from_start(start - 2)
525
+ else
526
+ if ident_start?(char)
527
+ start = current_pos
528
+ while ident_part?(next_char)
529
+ # Nothing to do
530
+ end
531
+ if current_char == '?' || ((current_char == '!' || current_char == '=') && peek_next_char != '=')
532
+ next_char
533
+ end
534
+ @token.type = :SYMBOL
535
+ @token.value = string_range_from_pool(start)
536
+ set_token_raw_from_start(start - 1)
537
+ else
538
+ @token.type = :":"
539
+ end
540
+ end
541
+ else
542
+ case char
543
+ when ':'
544
+ next_char :"::"
545
+ else
546
+ @token.type = :":"
547
+ end
548
+ end
549
+ when '~'
550
+ next_char :"~"
551
+ when '.'
552
+ case next_char
553
+ when '.'
554
+ case next_char
555
+ when '.'
556
+ next_char :"..."
557
+ else
558
+ @token.type = :".."
559
+ end
560
+ else
561
+ @token.type = :"."
562
+ end
563
+ when '&'
564
+ case next_char
565
+ when '&'
566
+ case next_char
567
+ when '='
568
+ next_char :"&&="
569
+ else
570
+ @token.type = :"&&"
571
+ end
572
+ when '='
573
+ next_char :"&="
574
+ when '+'
575
+ case next_char
576
+ when '='
577
+ next_char :"&+="
578
+ else
579
+ @token.type = :"&+"
580
+ end
581
+ when '-'
582
+ # Check if '>' comes after '&-', making it '&->'.
583
+ # We want to parse that like '&(->...)',
584
+ # so we only return '&' for now.
585
+ if peek_next_char == '>'
586
+ @token.type = :"&"
587
+ else
588
+ case next_char
589
+ when '='
590
+ next_char :"&-="
591
+ else
592
+ @token.type = :"&-"
593
+ end
594
+ end
595
+ when '*'
596
+ case next_char
597
+ when '*'
598
+ next_char :"&**"
599
+ when '='
600
+ next_char :"&*="
601
+ else
602
+ @token.type = :"&*"
603
+ end
604
+ else
605
+ @token.type = :"&"
606
+ end
607
+ when '|'
608
+ case next_char
609
+ when '|'
610
+ case next_char
611
+ when '='
612
+ next_char :"||="
613
+ else
614
+ @token.type = :"||"
615
+ end
616
+ when '='
617
+ next_char :"|="
618
+ else
619
+ @token.type = :"|"
620
+ end
621
+ when '^'
622
+ case next_char
623
+ when '='
624
+ next_char :"^="
625
+ else
626
+ @token.type = :"^"
627
+ end
628
+ when '"', '`', '\''
629
+ delimiter = current_char
630
+ if delimiter == '`' && @wants_def_or_macro_name
631
+ next_char :"`"
632
+ else
633
+ next_char
634
+ @token.type = :DELIMITER_START
635
+ @token.delimiter_state = Token::DelimiterState.new(delimiter == '`' ? :command : :string, delimiter, delimiter)
636
+ set_token_raw_from_start(start)
637
+ end
638
+ when '0'
639
+ scan_zero_number(start)
640
+ when '1', '2', '3', '4', '5', '6', '7', '8', '9'
641
+ scan_number current_pos
642
+ when '@'
643
+ start = current_pos
644
+ case next_char
645
+ when '['
646
+ next_char :"@["
647
+ else
648
+ class_var = false
649
+ if current_char == '@'
650
+ class_var = true
651
+ next_char
652
+ end
653
+ if ident_start?(current_char)
654
+ while ident_part?(next_char)
655
+ # Nothing to do
656
+ end
657
+ @token.type = class_var ? :CLASS_VAR : :INSTANCE_VAR
658
+ @token.value = string_range_from_pool(start)
659
+ else
660
+ unknown_token
661
+ end
662
+ end
663
+ when '$'
664
+ start = current_pos
665
+ next_char
666
+ case current_char
667
+ when '~'
668
+ next_char
669
+ @token.type = :"$~"
670
+ when '?'
671
+ next_char
672
+ @token.type = :"$?"
673
+ when .ascii_number?
674
+ start = current_pos
675
+ char = next_char
676
+ if char == '0'
677
+ char = next_char
678
+ else
679
+ while char.ascii_number?
680
+ char = next_char
681
+ end
682
+ char = next_char if char == '?'
683
+ end
684
+ @token.type = :GLOBAL_MATCH_DATA_INDEX
685
+ @token.value = string_range_from_pool(start)
686
+ else
687
+ if ident_start?(current_char)
688
+ while ident_part?(next_char)
689
+ # Nothing to do
690
+ end
691
+ @token.type = :GLOBAL
692
+ @token.value = string_range_from_pool(start)
693
+ else
694
+ unknown_token
695
+ end
696
+ end
697
+ when 'a'
698
+ case next_char
699
+ when 'b'
700
+ if next_char == 's' && next_char == 't' && next_char == 'r' && next_char == 'a' && next_char == 'c' && next_char == 't'
701
+ return check_ident_or_keyword(:abstract, start)
702
+ end
703
+ when 'l'
704
+ if next_char == 'i' && next_char == 'a' && next_char == 's'
705
+ return check_ident_or_keyword(:alias, start)
706
+ end
707
+ when 's'
708
+ peek = peek_next_char
709
+ case peek
710
+ when 'm'
711
+ next_char
712
+ return check_ident_or_keyword(:asm, start)
713
+ when '?'
714
+ next_char
715
+ next_char
716
+ @token.type = :IDENT
717
+ @token.value = :as?
718
+ return @token
719
+ else
720
+ return check_ident_or_keyword(:as, start)
721
+ end
722
+ when 'n'
723
+ case next_char
724
+ when 'd'
725
+ @token.type = :and
726
+ when 'n'
727
+ if next_char == 'o' && next_char == 't' && next_char == 'a' && next_char == 't' && next_char == 'i' && next_char == 'o' && next_char == 'n'
728
+ return check_ident_or_keyword(:annotation, start)
729
+ end
730
+ end
731
+ else
732
+ # scan_ident
733
+ end
734
+ scan_ident(start)
735
+ when 'b'
736
+ case next_char
737
+ when 'e'
738
+ if next_char == 'g' && next_char == 'i' && next_char == 'n'
739
+ return check_ident_or_keyword(:begin, start)
740
+ end
741
+ when 'r'
742
+ if next_char == 'e' && next_char == 'a' && next_char == 'k'
743
+ return check_ident_or_keyword(:break, start)
744
+ end
745
+ else
746
+ # scan_ident
747
+ end
748
+ scan_ident(start)
749
+ when 'c'
750
+ case next_char
751
+ when 'a'
752
+ if next_char == 's' && next_char == 'e'
753
+ return check_ident_or_keyword(:case, start)
754
+ end
755
+ when 'l'
756
+ if next_char == 'a' && next_char == 's' && next_char == 's'
757
+ return check_ident_or_keyword(:class, start)
758
+ end
759
+ else
760
+ # scan_ident
761
+ end
762
+ scan_ident(start)
763
+ when 'd'
764
+ case next_char
765
+ when 'e'
766
+ if next_char == 'f'
767
+ return check_ident_or_keyword(:def, start)
768
+ end
769
+ when 'o' then return check_ident_or_keyword(:do, start)
770
+ else
771
+ # scan_ident
772
+ end
773
+ scan_ident(start)
774
+ when 'e'
775
+ case next_char
776
+ when 'l'
777
+ case next_char
778
+ when 's'
779
+ case next_char
780
+ when 'e' then return check_ident_or_keyword(:else, start)
781
+ when 'i'
782
+ if next_char == 'f'
783
+ return check_ident_or_keyword(:elsif, start)
784
+ end
785
+ else
786
+ # scan_ident
787
+ end
788
+ else
789
+ # scan_ident
790
+ end
791
+ when 'n'
792
+ case next_char
793
+ when 'd'
794
+ return check_ident_or_keyword(:end, start)
795
+ when 's'
796
+ if next_char == 'u' && next_char == 'r' && next_char == 'e'
797
+ return check_ident_or_keyword(:ensure, start)
798
+ end
799
+ when 'u'
800
+ if next_char == 'm'
801
+ return check_ident_or_keyword(:enum, start)
802
+ end
803
+ else
804
+ # scan_ident
805
+ end
806
+ when 'x'
807
+ if next_char == 't' && next_char == 'e' && next_char == 'n' && next_char == 'd'
808
+ return check_ident_or_keyword(:extend, start)
809
+ end
810
+ else
811
+ # scan_ident
812
+ end
813
+ scan_ident(start)
814
+ when 'f'
815
+ case next_char
816
+ when 'a'
817
+ if next_char == 'l' && next_char == 's' && next_char == 'e'
818
+ return check_ident_or_keyword(:false, start)
819
+ end
820
+ when 'o'
821
+ if next_char == 'r'
822
+ return check_ident_or_keyword(:for, start)
823
+ end
824
+ when 'u'
825
+ if next_char == 'n'
826
+ return check_ident_or_keyword(:fun, start)
827
+ end
828
+ else
829
+ # scan_ident
830
+ end
831
+ scan_ident(start)
832
+ when 'i'
833
+ case next_char
834
+ when 'f'
835
+ return check_ident_or_keyword(:if, start)
836
+ when 'n'
837
+ if ident_part_or_end?(peek_next_char)
838
+ case next_char
839
+ when 'c'
840
+ if next_char == 'l' && next_char == 'u' && next_char == 'd' && next_char == 'e'
841
+ return check_ident_or_keyword(:include, start)
842
+ end
843
+ when 's'
844
+ if next_char == 't' && next_char == 'a' && next_char == 'n' && next_char == 'c' && next_char == 'e' && next_char == '_' && next_char == 's' && next_char == 'i' && next_char == 'z' && next_char == 'e' && next_char == 'o' && next_char == 'f'
845
+ return check_ident_or_keyword(:instance_sizeof, start)
846
+ end
847
+ else
848
+ # scan_ident
849
+ end
850
+ else
851
+ next_char
852
+ @token.type = :IDENT
853
+ @token.value = :in
854
+ return @token
855
+ end
856
+ when 's'
857
+ if next_char == '_' && next_char == 'a' && next_char == '?'
858
+ return check_ident_or_keyword(:is_a?, start)
859
+ end
860
+ else
861
+ # scan_ident
862
+ end
863
+ scan_ident(start)
864
+ when 'l'
865
+ case next_char
866
+ when 'i'
867
+ if next_char == 'b'
868
+ return check_ident_or_keyword(:lib, start)
869
+ end
870
+ else
871
+ # scan_ident
872
+ end
873
+ scan_ident(start)
874
+ when 'm'
875
+ case next_char
876
+ when 'a'
877
+ if next_char == 'c' && next_char == 'r' && next_char == 'o'
878
+ return check_ident_or_keyword(:macro, start)
879
+ end
880
+ when 'o'
881
+ case next_char
882
+ when 'd'
883
+ if next_char == 'u' && next_char == 'l' && next_char == 'e'
884
+ return check_ident_or_keyword(:module, start)
885
+ end
886
+ else
887
+ # scan_ident
888
+ end
889
+ else
890
+ # scan_ident
891
+ end
892
+ scan_ident(start)
893
+ when 'n'
894
+ case next_char
895
+ when 'e'
896
+ if next_char == 'x' && next_char == 't'
897
+ return check_ident_or_keyword(:next, start)
898
+ end
899
+ when 'i'
900
+ case next_char
901
+ when 'l'
902
+ if peek_next_char == '?'
903
+ next_char
904
+ return check_ident_or_keyword(:nil?, start)
905
+ else
906
+ return check_ident_or_keyword(:nil, start)
907
+ end
908
+ else
909
+ # scan_ident
910
+ end
911
+ else
912
+ # scan_ident
913
+ end
914
+ scan_ident(start)
915
+ when 'o'
916
+ case next_char
917
+ when 'f'
918
+ if peek_next_char == 'f'
919
+ next_char
920
+ if next_char == 's' && next_char == 'e' && next_char == 't' && next_char == 'o' && next_char == 'f'
921
+ return check_ident_or_keyword(:offsetof, start)
922
+ end
923
+ else
924
+ return check_ident_or_keyword(:of, start)
925
+ end
926
+ when 'u'
927
+ if next_char == 't'
928
+ return check_ident_or_keyword(:out, start)
929
+ end
930
+ else
931
+ # scan_ident
932
+ end
933
+ scan_ident(start)
934
+ when 'p'
935
+ case next_char
936
+ when 'o'
937
+ if next_char == 'i' && next_char == 'n' && next_char == 't' && next_char == 'e' && next_char == 'r' && next_char == 'o' && next_char == 'f'
938
+ return check_ident_or_keyword(:pointerof, start)
939
+ end
940
+ when 'r'
941
+ case next_char
942
+ when 'i'
943
+ if next_char == 'v' && next_char == 'a' && next_char == 't' && next_char == 'e'
944
+ return check_ident_or_keyword(:private, start)
945
+ end
946
+ when 'o'
947
+ if next_char == 't' && next_char == 'e' && next_char == 'c' && next_char == 't' && next_char == 'e' && next_char == 'd'
948
+ return check_ident_or_keyword(:protected, start)
949
+ end
950
+ else
951
+ # scan_ident
952
+ end
953
+ else
954
+ # scan_ident
955
+ end
956
+ scan_ident(start)
957
+ when 'r'
958
+ case next_char
959
+ when 'e'
960
+ case next_char
961
+ when 's'
962
+ case next_char
963
+ when 'c'
964
+ if next_char == 'u' && next_char == 'e'
965
+ return check_ident_or_keyword(:rescue, start)
966
+ end
967
+ when 'p'
968
+ if next_char == 'o' && next_char == 'n' && next_char == 'd' && next_char == 's' && next_char == '_' && next_char == 't' && next_char == 'o' && next_char == '?'
969
+ return check_ident_or_keyword(:responds_to?, start)
970
+ end
971
+ else
972
+ # scan_ident
973
+ end
974
+ when 't'
975
+ if next_char == 'u' && next_char == 'r' && next_char == 'n'
976
+ return check_ident_or_keyword(:return, start)
977
+ end
978
+ when 'q'
979
+ if next_char == 'u' && next_char == 'i' && next_char == 'r' && next_char == 'e'
980
+ return check_ident_or_keyword(:require, start)
981
+ end
982
+ else
983
+ # scan_ident
984
+ end
985
+ else
986
+ # scan_ident
987
+ end
988
+ scan_ident(start)
989
+ when 's'
990
+ case next_char
991
+ when 'e'
992
+ if next_char == 'l'
993
+ case next_char
994
+ when 'e'
995
+ if next_char == 'c' && next_char == 't'
996
+ return check_ident_or_keyword(:select, start)
997
+ end
998
+ when 'f'
999
+ return check_ident_or_keyword(:self, start)
1000
+ else
1001
+ # scan_ident
1002
+ end
1003
+ end
1004
+ when 'i'
1005
+ if next_char == 'z' && next_char == 'e' && next_char == 'o' && next_char == 'f'
1006
+ return check_ident_or_keyword(:sizeof, start)
1007
+ end
1008
+ when 't'
1009
+ if next_char == 'r' && next_char == 'u' && next_char == 'c' && next_char == 't'
1010
+ return check_ident_or_keyword(:struct, start)
1011
+ end
1012
+ when 'u'
1013
+ if next_char == 'p' && next_char == 'e' && next_char == 'r'
1014
+ return check_ident_or_keyword(:super, start)
1015
+ end
1016
+ else
1017
+ # scan_ident
1018
+ end
1019
+ scan_ident(start)
1020
+ when 't'
1021
+ case next_char
1022
+ when 'h'
1023
+ if next_char == 'e' && next_char == 'n'
1024
+ return check_ident_or_keyword(:then, start)
1025
+ end
1026
+ when 'r'
1027
+ if next_char == 'u' && next_char == 'e'
1028
+ return check_ident_or_keyword(:true, start)
1029
+ end
1030
+ when 'y'
1031
+ if next_char == 'p' && next_char == 'e'
1032
+ if peek_next_char == 'o'
1033
+ next_char
1034
+ if next_char == 'f'
1035
+ return check_ident_or_keyword(:typeof, start)
1036
+ end
1037
+ else
1038
+ return check_ident_or_keyword(:type, start)
1039
+ end
1040
+ end
1041
+ else
1042
+ # scan_ident
1043
+ end
1044
+ scan_ident(start)
1045
+ when 'u'
1046
+ if next_char == 'n'
1047
+ case next_char
1048
+ when 'i'
1049
+ case next_char
1050
+ when 'o'
1051
+ if next_char == 'n'
1052
+ return check_ident_or_keyword(:union, start)
1053
+ end
1054
+ when 'n'
1055
+ if next_char == 'i' && next_char == 't' && next_char == 'i' && next_char == 'a' && next_char == 'l' && next_char == 'i' && next_char == 'z' && next_char == 'e' && next_char == 'd'
1056
+ return check_ident_or_keyword(:uninitialized, start)
1057
+ end
1058
+ else
1059
+ # scan_ident
1060
+ end
1061
+ when 'l'
1062
+ if next_char == 'e' && next_char == 's' && next_char == 's'
1063
+ return check_ident_or_keyword(:unless, start)
1064
+ end
1065
+ when 't'
1066
+ if next_char == 'i' && next_char == 'l'
1067
+ return check_ident_or_keyword(:until, start)
1068
+ end
1069
+ else
1070
+ # scan_ident
1071
+ end
1072
+ end
1073
+ scan_ident(start)
1074
+ when 'v'
1075
+ if next_char == 'e' && next_char == 'r' && next_char == 'b' && next_char == 'a' && next_char == 't' && next_char == 'i' && next_char == 'm'
1076
+ return check_ident_or_keyword(:verbatim, start)
1077
+ end
1078
+ scan_ident(start)
1079
+ when 'w'
1080
+ case next_char
1081
+ when 'h'
1082
+ case next_char
1083
+ when 'e'
1084
+ if next_char == 'n'
1085
+ return check_ident_or_keyword(:when, start)
1086
+ end
1087
+ when 'i'
1088
+ if next_char == 'l' && next_char == 'e'
1089
+ return check_ident_or_keyword(:while, start)
1090
+ end
1091
+ else
1092
+ # scan_ident
1093
+ end
1094
+ when 'i'
1095
+ if next_char == 't' && next_char == 'h'
1096
+ return check_ident_or_keyword(:with, start)
1097
+ end
1098
+ else
1099
+ # scan_ident
1100
+ end
1101
+ scan_ident(start)
1102
+ when 'y'
1103
+ if next_char == 'i' && next_char == 'e' && next_char == 'l' && next_char == 'd'
1104
+ return check_ident_or_keyword(:yield, start)
1105
+ end
1106
+ scan_ident(start)
1107
+ when '_'
1108
+ case next_char
1109
+ when '_'
1110
+ case next_char
1111
+ when 'D'
1112
+ if next_char == 'I' && next_char == 'R' && next_char == '_' && next_char == '_'
1113
+ if ident_part_or_end?(peek_next_char)
1114
+ scan_ident(start)
1115
+ else
1116
+ next_char
1117
+ @token.type = :__DIR__
1118
+ return @token
1119
+ end
1120
+ end
1121
+ when 'E'
1122
+ if next_char == 'N' && next_char == 'D' && next_char == '_' && next_char == 'L' && next_char == 'I' && next_char == 'N' && next_char == 'E' && next_char == '_' && next_char == '_'
1123
+ if ident_part_or_end?(peek_next_char)
1124
+ scan_ident(start)
1125
+ else
1126
+ next_char
1127
+ @token.type = :__END_LINE__
1128
+ return @token
1129
+ end
1130
+ end
1131
+ when 'F'
1132
+ if next_char == 'I' && next_char == 'L' && next_char == 'E' && next_char == '_' && next_char == '_'
1133
+ if ident_part_or_end?(peek_next_char)
1134
+ scan_ident(start)
1135
+ else
1136
+ next_char
1137
+ @token.type = :__FILE__
1138
+ return @token
1139
+ end
1140
+ end
1141
+ when 'L'
1142
+ if next_char == 'I' && next_char == 'N' && next_char == 'E' && next_char == '_' && next_char == '_'
1143
+ if ident_part_or_end?(peek_next_char)
1144
+ scan_ident(start)
1145
+ else
1146
+ next_char
1147
+ @token.type = :__LINE__
1148
+ return @token
1149
+ end
1150
+ end
1151
+ else
1152
+ # scan_ident
1153
+ end
1154
+ else
1155
+ unless ident_part?(current_char)
1156
+ @token.type = :UNDERSCORE
1157
+ return @token
1158
+ end
1159
+ end
1160
+
1161
+ scan_ident(start)
1162
+ else
1163
+ if current_char.ascii_uppercase?
1164
+ start = current_pos
1165
+ while ident_part?(next_char)
1166
+ # Nothing to do
1167
+ end
1168
+ @token.type = :CONST
1169
+ @token.value = string_range_from_pool(start)
1170
+ elsif current_char.ascii_lowercase? || current_char == '_' || current_char.ord > 0x9F
1171
+ next_char
1172
+ scan_ident(start)
1173
+ else
1174
+ unknown_token
1175
+ end
1176
+ end
1177
+
1178
+ if reset_regex_flags
1179
+ @wants_regex = true
1180
+ @slash_is_regex = false
1181
+ end
1182
+
1183
+ @token
1184
+ end
1185
+ end
1186
+ end