iv-phonic 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +2 -3
- data/Rakefile +1 -1
- data/ext/include/iv/alloc.h +11 -4
- data/ext/include/iv/ast-factory.h +15 -6
- data/ext/include/iv/ast-fwd.h +2 -0
- data/ext/include/iv/ast-info.h +21 -0
- data/ext/include/iv/ast-serializer.h +6 -0
- data/ext/include/iv/ast-visitor.h +1 -0
- data/ext/include/iv/ast.h +522 -105
- data/ext/include/iv/conversions.h +79 -11
- data/ext/include/iv/keyword.h +460 -0
- data/ext/include/iv/lexer.h +77 -464
- data/ext/include/iv/location.h +6 -0
- data/ext/include/iv/parser.h +100 -43
- data/ext/include/iv/token.h +2 -0
- data/ext/iv/phonic/ast-fwd.h +17 -1
- data/ext/iv/phonic/creator.h +8 -0
- data/ext/iv/phonic/factory.h +10 -1
- data/ext/iv/phonic/parser.h +2 -1
- data/ext/iv/phonic/rparser.h +3 -4
- data/ext/iv/phonic/source.h +4 -5
- metadata +5 -6
- data/ext/include/iv/source.h +0 -27
- data/ext/iv/phonic/ast.h +0 -10
- data/ext/iv/phonic/rnode.h +0 -15
data/ext/include/iv/lexer.h
CHANGED
@@ -9,22 +9,17 @@
|
|
9
9
|
#include "uchar.h"
|
10
10
|
#include "chars.h"
|
11
11
|
#include "token.h"
|
12
|
-
#include "source.h"
|
13
12
|
#include "location.h"
|
14
13
|
#include "noncopyable.h"
|
14
|
+
#include "keyword.h"
|
15
15
|
|
16
16
|
namespace iv {
|
17
17
|
namespace core {
|
18
18
|
|
19
|
-
|
19
|
+
template<typename Source>
|
20
|
+
class Lexer: private Noncopyable<Lexer<Source> >::type {
|
20
21
|
public:
|
21
|
-
|
22
|
-
kClear = 0,
|
23
|
-
kIdentifyReservedWords = 1,
|
24
|
-
kIgnoreReservedWords = 2,
|
25
|
-
kIgnoreReservedWordsAndIdentifyGetterOrSetter = 4,
|
26
|
-
kStrict = 8
|
27
|
-
};
|
22
|
+
|
28
23
|
enum State {
|
29
24
|
NONE,
|
30
25
|
ESCAPE,
|
@@ -33,9 +28,9 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
33
28
|
OCTAL
|
34
29
|
};
|
35
30
|
|
36
|
-
explicit Lexer(
|
31
|
+
explicit Lexer(const Source* src)
|
37
32
|
: source_(src),
|
38
|
-
buffer8_(
|
33
|
+
buffer8_(),
|
39
34
|
buffer16_(kInitialReadBufferCapacity),
|
40
35
|
pos_(0),
|
41
36
|
end_(source_->size()),
|
@@ -46,15 +41,16 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
46
41
|
Initialize();
|
47
42
|
}
|
48
43
|
|
49
|
-
|
50
|
-
|
44
|
+
template<typename LexType>
|
45
|
+
typename Token::Type Next(bool strict) {
|
46
|
+
typename Token::Type token;
|
51
47
|
has_line_terminator_before_next_ = false;
|
52
48
|
do {
|
53
|
-
location_.begin_position_ = pos();
|
54
49
|
while (Chars::IsWhiteSpace(c_)) {
|
55
50
|
// white space
|
56
51
|
Advance();
|
57
52
|
}
|
53
|
+
location_.set_begin_position(pos() - 1);
|
58
54
|
switch (c_) {
|
59
55
|
case '"':
|
60
56
|
case '\'':
|
@@ -252,7 +248,12 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
252
248
|
case '^':
|
253
249
|
// ^
|
254
250
|
Advance();
|
255
|
-
|
251
|
+
if (c_ == '=') {
|
252
|
+
Advance();
|
253
|
+
token = Token::ASSIGN_BIT_XOR;
|
254
|
+
} else {
|
255
|
+
token = Token::BIT_XOR;
|
256
|
+
}
|
256
257
|
break;
|
257
258
|
|
258
259
|
case '.':
|
@@ -338,7 +339,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
338
339
|
|
339
340
|
default:
|
340
341
|
if (Chars::IsIdentifierStart(c_)) {
|
341
|
-
token = ScanIdentifier(
|
342
|
+
token = ScanIdentifier<LexType>(strict);
|
342
343
|
} else if (Chars::IsDecimalDigit(c_)) {
|
343
344
|
token = ScanNumber(false);
|
344
345
|
} else if (Chars::IsLineTerminator(c_)) {
|
@@ -354,7 +355,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
354
355
|
break;
|
355
356
|
}
|
356
357
|
} while (token == Token::NOT_FOUND);
|
357
|
-
location_.
|
358
|
+
location_.set_end_position(pos() - 1);
|
358
359
|
return token;
|
359
360
|
}
|
360
361
|
|
@@ -362,7 +363,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
362
363
|
return buffer16_;
|
363
364
|
}
|
364
365
|
|
365
|
-
inline const std::
|
366
|
+
inline const std::string& Buffer8() const {
|
366
367
|
return buffer8_;
|
367
368
|
}
|
368
369
|
|
@@ -400,7 +401,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
400
401
|
return pos_;
|
401
402
|
}
|
402
403
|
|
403
|
-
inline
|
404
|
+
inline const Source* source() const {
|
404
405
|
return source_;
|
405
406
|
}
|
406
407
|
|
@@ -408,7 +409,17 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
408
409
|
return location_;
|
409
410
|
}
|
410
411
|
|
412
|
+
inline std::size_t begin_position() const {
|
413
|
+
return location_.begin_position();
|
414
|
+
}
|
415
|
+
|
416
|
+
inline std::size_t end_position() const {
|
417
|
+
return location_.end_position();
|
418
|
+
}
|
419
|
+
|
411
420
|
bool ScanRegExpLiteral(bool contains_eq) {
|
421
|
+
// location begin_position is the same with DIV
|
422
|
+
// so, no need to set
|
412
423
|
bool character = false;
|
413
424
|
buffer16_.clear();
|
414
425
|
if (contains_eq) {
|
@@ -435,7 +446,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
435
446
|
Record16Advance();
|
436
447
|
}
|
437
448
|
}
|
438
|
-
Advance();
|
449
|
+
Advance(); // waste '/'
|
439
450
|
return true;
|
440
451
|
}
|
441
452
|
|
@@ -449,8 +460,9 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
449
460
|
return false;
|
450
461
|
}
|
451
462
|
Advance();
|
452
|
-
|
453
|
-
|
463
|
+
bool ng = false;
|
464
|
+
uc = ScanHexEscape('u', 4, &ng);
|
465
|
+
if (ng || uc == '\\') {
|
454
466
|
return false;
|
455
467
|
}
|
456
468
|
Record16(uc);
|
@@ -458,6 +470,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
458
470
|
Record16Advance();
|
459
471
|
}
|
460
472
|
}
|
473
|
+
location_.set_end_position(pos() - 1);
|
461
474
|
return true;
|
462
475
|
}
|
463
476
|
|
@@ -501,33 +514,6 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
501
514
|
}
|
502
515
|
}
|
503
516
|
|
504
|
-
inline Token::Type IsMatch(char const * keyword,
|
505
|
-
std::size_t len,
|
506
|
-
Token::Type guess, bool strict) const {
|
507
|
-
if (!strict) {
|
508
|
-
return Token::IDENTIFIER;
|
509
|
-
}
|
510
|
-
std::vector<uc16>::const_iterator it = buffer16_.begin();
|
511
|
-
do {
|
512
|
-
if (*it++ != *keyword++) {
|
513
|
-
return Token::IDENTIFIER;
|
514
|
-
}
|
515
|
-
} while (--len);
|
516
|
-
return guess;
|
517
|
-
}
|
518
|
-
|
519
|
-
inline Token::Type IsMatch(char const * keyword,
|
520
|
-
std::size_t len,
|
521
|
-
Token::Type guess) const {
|
522
|
-
std::vector<uc16>::const_iterator it = buffer16_.begin();
|
523
|
-
do {
|
524
|
-
if (*it++ != *keyword++) {
|
525
|
-
return Token::IDENTIFIER;
|
526
|
-
}
|
527
|
-
} while (--len);
|
528
|
-
return guess;
|
529
|
-
}
|
530
|
-
|
531
517
|
Token::Type SkipSingleLineComment() {
|
532
518
|
Advance();
|
533
519
|
// see ECMA-262 section 7.4
|
@@ -582,8 +568,8 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
582
568
|
return Token::NOT_FOUND;
|
583
569
|
}
|
584
570
|
|
585
|
-
|
586
|
-
|
571
|
+
template<typename LexType>
|
572
|
+
Token::Type ScanIdentifier(bool strict) {
|
587
573
|
uc16 uc;
|
588
574
|
|
589
575
|
buffer16_.clear();
|
@@ -594,8 +580,9 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
594
580
|
return Token::ILLEGAL;
|
595
581
|
}
|
596
582
|
Advance();
|
597
|
-
|
598
|
-
|
583
|
+
bool ng = false;
|
584
|
+
uc = ScanHexEscape('u', 4, &ng);
|
585
|
+
if (ng || uc == '\\' || !Chars::IsIdentifierStart(uc)) {
|
599
586
|
return Token::ILLEGAL;
|
600
587
|
}
|
601
588
|
Record16(uc);
|
@@ -610,8 +597,9 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
610
597
|
return Token::ILLEGAL;
|
611
598
|
}
|
612
599
|
Advance();
|
613
|
-
|
614
|
-
|
600
|
+
bool ng = false;
|
601
|
+
uc = ScanHexEscape('u', 4, &ng);
|
602
|
+
if (ng || uc == '\\' || !Chars::IsIdentifierPart(uc)) {
|
615
603
|
return Token::ILLEGAL;
|
616
604
|
}
|
617
605
|
Record16(uc);
|
@@ -620,402 +608,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
620
608
|
}
|
621
609
|
}
|
622
610
|
|
623
|
-
|
624
|
-
token = DetectKeyword(type & kStrict);
|
625
|
-
} else if (type & kIgnoreReservedWordsAndIdentifyGetterOrSetter) {
|
626
|
-
token = DetectGetOrSet();
|
627
|
-
}
|
628
|
-
|
629
|
-
return token;
|
630
|
-
}
|
631
|
-
|
632
|
-
// detect which Identifier is Keyword, FutureReservedWord or not
|
633
|
-
// Keyword and FutureReservedWord are defined in ECMA-262 5th.
|
634
|
-
//
|
635
|
-
// Some words such as :
|
636
|
-
// int, short, boolean, byte, long, char, float, double, abstract, volatile,
|
637
|
-
// transient, final, throws, goto, native, synchronized
|
638
|
-
// were defined as FutureReservedWord in ECMA-262 3rd, but not in 5th.
|
639
|
-
// So, DetectKeyword interprets them as Identifier.
|
640
|
-
Token::Type DetectKeyword(bool strict) const {
|
641
|
-
const std::size_t len = buffer16_.size();
|
642
|
-
Token::Type token = Token::IDENTIFIER;
|
643
|
-
switch (len) {
|
644
|
-
case 2:
|
645
|
-
// if in do
|
646
|
-
if (buffer16_[0] == 'i') {
|
647
|
-
if (buffer16_[1] == 'f') {
|
648
|
-
token = Token::IF;
|
649
|
-
} else if (buffer16_[1] == 'n') {
|
650
|
-
token = Token::IN;
|
651
|
-
}
|
652
|
-
} else if (buffer16_[0] == 'd' && buffer16_[1] == 'o') {
|
653
|
-
// do
|
654
|
-
token = Token::DO;
|
655
|
-
}
|
656
|
-
break;
|
657
|
-
case 3:
|
658
|
-
// for var int new try let
|
659
|
-
switch (buffer16_[2]) {
|
660
|
-
case 't':
|
661
|
-
if (buffer16_[0] == 'l' && buffer16_[1] == 'e' && strict) {
|
662
|
-
// let
|
663
|
-
token = Token::LET;
|
664
|
-
} else if (buffer16_[0] == 'i' && buffer16_[1] == 'n') {
|
665
|
-
// int (removed)
|
666
|
-
// token = Token::INT;
|
667
|
-
}
|
668
|
-
break;
|
669
|
-
case 'r':
|
670
|
-
// for var
|
671
|
-
if (buffer16_[0] == 'f' && buffer16_[1] == 'o') {
|
672
|
-
// for
|
673
|
-
token = Token::FOR;
|
674
|
-
} else if (buffer16_[0] == 'v' && buffer16_[1] == 'a') {
|
675
|
-
// var
|
676
|
-
token = Token::VAR;
|
677
|
-
}
|
678
|
-
break;
|
679
|
-
case 'y':
|
680
|
-
// try
|
681
|
-
if (buffer16_[0] == 't' && buffer16_[1] == 'r') {
|
682
|
-
token = Token::TRY;
|
683
|
-
}
|
684
|
-
break;
|
685
|
-
case 'w':
|
686
|
-
// new
|
687
|
-
if (buffer16_[0] == 'n' && buffer16_[1] == 'e') {
|
688
|
-
token = Token::NEW;
|
689
|
-
}
|
690
|
-
break;
|
691
|
-
}
|
692
|
-
break;
|
693
|
-
case 4:
|
694
|
-
// else case true byte null this
|
695
|
-
// void with long enum char goto
|
696
|
-
// number 3 character is most duplicated
|
697
|
-
switch (buffer16_[3]) {
|
698
|
-
case 'e':
|
699
|
-
// else case true byte
|
700
|
-
if (buffer16_[2] == 's') {
|
701
|
-
if (buffer16_[0] == 'e' && buffer16_[1] == 'l') {
|
702
|
-
// else
|
703
|
-
token = Token::ELSE;
|
704
|
-
} else if (buffer16_[0] == 'c' && buffer16_[1] == 'a') {
|
705
|
-
// case
|
706
|
-
token = Token::CASE;
|
707
|
-
}
|
708
|
-
} else if (buffer16_[0] == 't' &&
|
709
|
-
buffer16_[1] == 'r' && buffer16_[2] == 'u') {
|
710
|
-
// true
|
711
|
-
token = Token::TRUE_LITERAL;
|
712
|
-
} else if (buffer16_[0] == 'b' &&
|
713
|
-
buffer16_[1] == 'y' && buffer16_[2] == 't') {
|
714
|
-
// byte (removed)
|
715
|
-
// token = Token::BYTE;
|
716
|
-
}
|
717
|
-
break;
|
718
|
-
case 'l':
|
719
|
-
// null
|
720
|
-
if (buffer16_[0] == 'n' &&
|
721
|
-
buffer16_[1] == 'u' && buffer16_[2] == 'l') {
|
722
|
-
token = Token::NULL_LITERAL;
|
723
|
-
}
|
724
|
-
break;
|
725
|
-
case 's':
|
726
|
-
// this
|
727
|
-
if (buffer16_[0] == 't' &&
|
728
|
-
buffer16_[1] == 'h' && buffer16_[2] == 'i') {
|
729
|
-
token = Token::THIS;
|
730
|
-
}
|
731
|
-
break;
|
732
|
-
case 'd':
|
733
|
-
// void
|
734
|
-
if (buffer16_[0] == 'v' &&
|
735
|
-
buffer16_[1] == 'o' && buffer16_[2] == 'i') {
|
736
|
-
token = Token::VOID;
|
737
|
-
}
|
738
|
-
break;
|
739
|
-
case 'h':
|
740
|
-
// with
|
741
|
-
if (buffer16_[0] == 'w' &&
|
742
|
-
buffer16_[1] == 'i' && buffer16_[2] == 't') {
|
743
|
-
token = Token::WITH;
|
744
|
-
}
|
745
|
-
break;
|
746
|
-
case 'g':
|
747
|
-
// long (removed)
|
748
|
-
if (buffer16_[0] == 'l' &&
|
749
|
-
buffer16_[1] == 'o' && buffer16_[2] == 'n') {
|
750
|
-
// token = Token::LONG;
|
751
|
-
}
|
752
|
-
break;
|
753
|
-
case 'm':
|
754
|
-
// enum
|
755
|
-
if (buffer16_[0] == 'e' &&
|
756
|
-
buffer16_[1] == 'n' && buffer16_[2] == 'u') {
|
757
|
-
token = Token::ENUM;
|
758
|
-
}
|
759
|
-
break;
|
760
|
-
case 'r':
|
761
|
-
// char (removed)
|
762
|
-
if (buffer16_[0] == 'c' &&
|
763
|
-
buffer16_[1] == 'h' && buffer16_[2] == 'a') {
|
764
|
-
// token = Token::CHAR;
|
765
|
-
}
|
766
|
-
break;
|
767
|
-
case 'o':
|
768
|
-
// goto (removed)
|
769
|
-
if (buffer16_[0] == 'g' &&
|
770
|
-
buffer16_[1] == 'o' && buffer16_[2] == 't') {
|
771
|
-
// token = Token::GOTO;
|
772
|
-
}
|
773
|
-
break;
|
774
|
-
}
|
775
|
-
break;
|
776
|
-
case 5:
|
777
|
-
// break final float catch super while
|
778
|
-
// throw short class const false yield
|
779
|
-
// number 3 character is most duplicated
|
780
|
-
switch (buffer16_[3]) {
|
781
|
-
case 'a':
|
782
|
-
// break final float
|
783
|
-
if (buffer16_[0] == 'b' && buffer16_[1] == 'r' &&
|
784
|
-
buffer16_[2] == 'e' && buffer16_[4] == 'k') {
|
785
|
-
// break
|
786
|
-
token = Token::BREAK;
|
787
|
-
} else if (buffer16_[0] == 'f') {
|
788
|
-
if (buffer16_[1] == 'i' &&
|
789
|
-
buffer16_[2] == 'n' && buffer16_[4] == 'l') {
|
790
|
-
// final (removed)
|
791
|
-
// token = Token::FINAL;
|
792
|
-
} else if (buffer16_[1] == 'l' &&
|
793
|
-
buffer16_[2] == 'o' && buffer16_[4] == 't') {
|
794
|
-
// float (removed)
|
795
|
-
// token = Token::FLOAT;
|
796
|
-
}
|
797
|
-
}
|
798
|
-
break;
|
799
|
-
case 'c':
|
800
|
-
if (buffer16_[0] == 'c' && buffer16_[1] == 'a' &&
|
801
|
-
buffer16_[2] == 't' && buffer16_[4] == 'h') {
|
802
|
-
// catch
|
803
|
-
token = Token::CATCH;
|
804
|
-
}
|
805
|
-
break;
|
806
|
-
case 'e':
|
807
|
-
if (buffer16_[0] == 's' && buffer16_[1] == 'u' &&
|
808
|
-
buffer16_[2] == 'p' && buffer16_[4] == 'r') {
|
809
|
-
// super
|
810
|
-
token = Token::SUPER;
|
811
|
-
}
|
812
|
-
break;
|
813
|
-
case 'l':
|
814
|
-
if (buffer16_[0] == 'w' && buffer16_[1] == 'h' &&
|
815
|
-
buffer16_[2] == 'i' && buffer16_[4] == 'e') {
|
816
|
-
// while
|
817
|
-
token = Token::WHILE;
|
818
|
-
} else if (strict &&
|
819
|
-
buffer16_[0] == 'y' && buffer16_[1] == 'i' &&
|
820
|
-
buffer16_[2] == 'e' && buffer16_[4] == 'd') {
|
821
|
-
// yield
|
822
|
-
token = Token::YIELD;
|
823
|
-
}
|
824
|
-
break;
|
825
|
-
case 'o':
|
826
|
-
if (buffer16_[0] == 't' && buffer16_[1] == 'h' &&
|
827
|
-
buffer16_[2] == 'r' && buffer16_[4] == 'w') {
|
828
|
-
// throw
|
829
|
-
token = Token::THROW;
|
830
|
-
}
|
831
|
-
break;
|
832
|
-
case 'r':
|
833
|
-
if (buffer16_[0] == 's' && buffer16_[1] == 'h' &&
|
834
|
-
buffer16_[2] == 'o' && buffer16_[4] == 't') {
|
835
|
-
// short (removed)
|
836
|
-
// token = Token::SHORT;
|
837
|
-
}
|
838
|
-
break;
|
839
|
-
case 's':
|
840
|
-
// class const false
|
841
|
-
if (buffer16_[0] == 'c') {
|
842
|
-
if (buffer16_[1] == 'l' &&
|
843
|
-
buffer16_[2] == 'a' && buffer16_[4] == 's') {
|
844
|
-
// class
|
845
|
-
token = Token::CLASS;
|
846
|
-
} else if (buffer16_[1] == 'o' &&
|
847
|
-
buffer16_[2] == 'n' && buffer16_[4] == 't') {
|
848
|
-
// const
|
849
|
-
token = Token::CONST;
|
850
|
-
}
|
851
|
-
} else if (buffer16_[0] == 'f' && buffer16_[1] == 'a' &&
|
852
|
-
buffer16_[2] == 'l' && buffer16_[4] == 'e') {
|
853
|
-
// false
|
854
|
-
token = Token::FALSE_LITERAL;
|
855
|
-
}
|
856
|
-
break;
|
857
|
-
}
|
858
|
-
break;
|
859
|
-
case 6:
|
860
|
-
// double delete export import native
|
861
|
-
// public return static switch typeof throws
|
862
|
-
// number 0 character is most duplicated
|
863
|
-
switch (buffer16_[0]) {
|
864
|
-
case 'd':
|
865
|
-
// double delete
|
866
|
-
if (buffer16_[5] == 'e' &&
|
867
|
-
buffer16_[4] == 'l' && buffer16_[3] == 'b' &&
|
868
|
-
buffer16_[2] == 'u' && buffer16_[1] == 'o') {
|
869
|
-
// double
|
870
|
-
// token = Token::DOUBLE;
|
871
|
-
} else if (buffer16_[5] == 'e' &&
|
872
|
-
buffer16_[4] == 't' && buffer16_[3] == 'e' &&
|
873
|
-
buffer16_[2] == 'l' && buffer16_[1] == 'e') {
|
874
|
-
// delete
|
875
|
-
token = Token::DELETE;
|
876
|
-
}
|
877
|
-
break;
|
878
|
-
case 'e':
|
879
|
-
// export
|
880
|
-
token = IsMatch("export", len, Token::EXPORT);
|
881
|
-
break;
|
882
|
-
case 'i':
|
883
|
-
// import
|
884
|
-
token = IsMatch("import", len, Token::IMPORT);
|
885
|
-
break;
|
886
|
-
case 'n':
|
887
|
-
// native (removed)
|
888
|
-
// token = IsMatch("native", len, Token::NATIVE);
|
889
|
-
break;
|
890
|
-
case 'p':
|
891
|
-
// public
|
892
|
-
token = IsMatch("public", len, Token::PUBLIC, strict);
|
893
|
-
break;
|
894
|
-
case 'r':
|
895
|
-
// return
|
896
|
-
token = IsMatch("return", len, Token::RETURN);
|
897
|
-
break;
|
898
|
-
case 's':
|
899
|
-
// switch static
|
900
|
-
if (buffer16_[1] == 'w' &&
|
901
|
-
buffer16_[2] == 'i' && buffer16_[3] == 't' &&
|
902
|
-
buffer16_[4] == 'c' && buffer16_[5] == 'h') {
|
903
|
-
// switch
|
904
|
-
token = Token::SWITCH;
|
905
|
-
} else if (strict &&
|
906
|
-
buffer16_[1] == 't' &&
|
907
|
-
buffer16_[2] == 'a' && buffer16_[3] == 't' &&
|
908
|
-
buffer16_[4] == 'i' && buffer16_[5] == 'c') {
|
909
|
-
// static
|
910
|
-
token = Token::STATIC;
|
911
|
-
}
|
912
|
-
break;
|
913
|
-
case 't':
|
914
|
-
// typeof throws
|
915
|
-
if (buffer16_[5] == 'f' &&
|
916
|
-
buffer16_[4] == 'o' && buffer16_[3] == 'e' &&
|
917
|
-
buffer16_[2] == 'p' && buffer16_[1] == 'y') {
|
918
|
-
// typeof
|
919
|
-
token = Token::TYPEOF;
|
920
|
-
} else if (buffer16_[5] == 's' &&
|
921
|
-
buffer16_[4] == 'w' && buffer16_[3] == 'o' &&
|
922
|
-
buffer16_[2] == 'r' && buffer16_[1] == 'h') {
|
923
|
-
// throws (removed)
|
924
|
-
// token = Token::THROWS;
|
925
|
-
}
|
926
|
-
break;
|
927
|
-
}
|
928
|
-
break;
|
929
|
-
case 7:
|
930
|
-
// boolean default extends finally package private
|
931
|
-
// number 0 character is most duplicated
|
932
|
-
switch (buffer16_[0]) {
|
933
|
-
case 'b':
|
934
|
-
// boolean (removed)
|
935
|
-
// token = IsMatch("boolean", len, Token::BOOLEAN);
|
936
|
-
break;
|
937
|
-
case 'd':
|
938
|
-
token = IsMatch("default", len, Token::DEFAULT);
|
939
|
-
break;
|
940
|
-
case 'e':
|
941
|
-
token = IsMatch("extends", len, Token::EXTENDS);
|
942
|
-
break;
|
943
|
-
case 'f':
|
944
|
-
token = IsMatch("finally", len, Token::FINALLY);
|
945
|
-
break;
|
946
|
-
case 'p':
|
947
|
-
if (buffer16_[1] == 'a') {
|
948
|
-
token = IsMatch("package", len, Token::PACKAGE, strict);
|
949
|
-
} else if (buffer16_[1] == 'r') {
|
950
|
-
token = IsMatch("private", len, Token::PRIVATE, strict);
|
951
|
-
}
|
952
|
-
break;
|
953
|
-
}
|
954
|
-
break;
|
955
|
-
case 8:
|
956
|
-
// debugger continue abstract volatile function
|
957
|
-
// number 4 character is most duplicated
|
958
|
-
switch (buffer16_[4]) {
|
959
|
-
case 'g':
|
960
|
-
token = IsMatch("debugger", len, Token::DEBUGGER);
|
961
|
-
break;
|
962
|
-
case 'i':
|
963
|
-
token = IsMatch("continue", len, Token::CONTINUE);
|
964
|
-
break;
|
965
|
-
case 'r':
|
966
|
-
// abstract (removed)
|
967
|
-
// token = IsMatch("abstract", len, Token::ABSTRACT);
|
968
|
-
break;
|
969
|
-
case 't':
|
970
|
-
if (buffer16_[1] == 'o') {
|
971
|
-
// token = IsMatch("volatile", len, Token::VOLATILE);
|
972
|
-
} else if (buffer16_[1] == 'u') {
|
973
|
-
token = IsMatch("function", len, Token::FUNCTION);
|
974
|
-
}
|
975
|
-
break;
|
976
|
-
}
|
977
|
-
break;
|
978
|
-
case 9:
|
979
|
-
// interface protected transient
|
980
|
-
if (buffer16_[1] == 'n') {
|
981
|
-
token = IsMatch("interface", len, Token::INTERFACE, strict);
|
982
|
-
} else if (buffer16_[1] == 'r') {
|
983
|
-
if (buffer16_[0] == 'p') {
|
984
|
-
token = IsMatch("protected", len, Token::PROTECTED, strict);
|
985
|
-
} else if (buffer16_[0] == 't') {
|
986
|
-
// transient (removed)
|
987
|
-
// token = IsMatch("transient", len, Token::TRANSIENT);
|
988
|
-
}
|
989
|
-
}
|
990
|
-
break;
|
991
|
-
case 10:
|
992
|
-
// instanceof implements
|
993
|
-
if (buffer16_[1] == 'n') {
|
994
|
-
token = IsMatch("instanceof", len, Token::INSTANCEOF);
|
995
|
-
} else if (buffer16_[1] == 'm') {
|
996
|
-
token = IsMatch("implements", len, Token::IMPLEMENTS, strict);
|
997
|
-
}
|
998
|
-
break;
|
999
|
-
case 12:
|
1000
|
-
// synchronized (removed)
|
1001
|
-
// token = IsMatch("synchronized", len, Token::SYNCHRONIZED);
|
1002
|
-
token = Token::IDENTIFIER;
|
1003
|
-
break;
|
1004
|
-
}
|
1005
|
-
return token;
|
1006
|
-
}
|
1007
|
-
|
1008
|
-
Token::Type DetectGetOrSet() const {
|
1009
|
-
if (buffer16_.size() == 3) {
|
1010
|
-
if (buffer16_[1] == 'e' && buffer16_[2] == 't') {
|
1011
|
-
if (buffer16_[0] == 'g') {
|
1012
|
-
return Token::GET;
|
1013
|
-
} else if (buffer16_[0] == 's') {
|
1014
|
-
return Token::SET;
|
1015
|
-
}
|
1016
|
-
}
|
1017
|
-
}
|
1018
|
-
return Token::IDENTIFIER;
|
611
|
+
return detail::Keyword<LexType>::Detect(buffer16_, strict);
|
1019
612
|
}
|
1020
613
|
|
1021
614
|
Token::Type ScanString() {
|
@@ -1031,7 +624,9 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1031
624
|
if (type_ == NONE) {
|
1032
625
|
type_ = ESCAPE;
|
1033
626
|
}
|
1034
|
-
ScanEscape()
|
627
|
+
if (!ScanEscape()) {
|
628
|
+
return Token::ILLEGAL;
|
629
|
+
}
|
1035
630
|
} else {
|
1036
631
|
Record16Advance();
|
1037
632
|
}
|
@@ -1045,10 +640,10 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1045
640
|
return Token::STRING;
|
1046
641
|
}
|
1047
642
|
|
1048
|
-
|
643
|
+
bool ScanEscape() {
|
1049
644
|
if (Chars::IsLineTerminator(c_)) {
|
1050
645
|
SkipLineTerminator();
|
1051
|
-
return;
|
646
|
+
return true;
|
1052
647
|
}
|
1053
648
|
switch (c_) {
|
1054
649
|
case '\'':
|
@@ -1076,18 +671,30 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1076
671
|
Record16('\t');
|
1077
672
|
Advance();
|
1078
673
|
break;
|
1079
|
-
case 'u' :
|
674
|
+
case 'u' : {
|
1080
675
|
Advance();
|
1081
|
-
|
676
|
+
bool ng = false;
|
677
|
+
const uc16 uc = ScanHexEscape('u', 4, &ng);
|
678
|
+
if (ng) {
|
679
|
+
return false;
|
680
|
+
}
|
681
|
+
Record16(uc);
|
1082
682
|
break;
|
683
|
+
}
|
1083
684
|
case 'v' :
|
1084
685
|
Record16('\v');
|
1085
686
|
Advance();
|
1086
687
|
break;
|
1087
|
-
case 'x' :
|
688
|
+
case 'x' : {
|
1088
689
|
Advance();
|
1089
|
-
|
690
|
+
bool ng = false;
|
691
|
+
const uc16 uc = ScanHexEscape('x', 2, &ng);
|
692
|
+
if (ng) {
|
693
|
+
return false;
|
694
|
+
}
|
695
|
+
Record16(uc);
|
1090
696
|
break;
|
697
|
+
}
|
1091
698
|
case '0' :
|
1092
699
|
case '1' :
|
1093
700
|
case '2' :
|
@@ -1102,10 +709,16 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1102
709
|
Record16(ScanOctalEscape());
|
1103
710
|
break;
|
1104
711
|
|
712
|
+
case '8' :
|
713
|
+
case '9' :
|
714
|
+
// section 7.8.4 and B1.2
|
715
|
+
return false;
|
716
|
+
|
1105
717
|
default:
|
1106
718
|
Record16Advance();
|
1107
719
|
break;
|
1108
720
|
}
|
721
|
+
return true;
|
1109
722
|
}
|
1110
723
|
|
1111
724
|
Token::Type ScanNumber(const bool period) {
|
@@ -1181,14 +794,13 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1181
794
|
|
1182
795
|
if (type == OCTAL) {
|
1183
796
|
double val = 0;
|
1184
|
-
for (std::
|
797
|
+
for (std::string::const_iterator it = buffer8_.begin(),
|
1185
798
|
last = buffer8_.end(); it != last; ++it) {
|
1186
799
|
val = val * 8 + (*it - '0');
|
1187
800
|
}
|
1188
801
|
numeric_ = val;
|
1189
802
|
} else {
|
1190
|
-
|
1191
|
-
numeric_ = std::strtod(buffer8_.data(), NULL);
|
803
|
+
numeric_ = std::strtod(buffer8_.c_str(), NULL);
|
1192
804
|
}
|
1193
805
|
type_ = type;
|
1194
806
|
return Token::NUMBER;
|
@@ -1211,7 +823,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1211
823
|
return res;
|
1212
824
|
}
|
1213
825
|
|
1214
|
-
uc16 ScanHexEscape(uc16 c, int len) {
|
826
|
+
uc16 ScanHexEscape(uc16 c, int len, bool* ng) {
|
1215
827
|
uc16 res = 0;
|
1216
828
|
for (int i = 0; i < len; ++i) {
|
1217
829
|
const int d = HexValue(c_);
|
@@ -1219,6 +831,7 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1219
831
|
for (int j = i - 1; j >= 0; --j) {
|
1220
832
|
PushBack();
|
1221
833
|
}
|
834
|
+
*ng = true;
|
1222
835
|
return c;
|
1223
836
|
}
|
1224
837
|
res = res * 16 + d;
|
@@ -1262,8 +875,8 @@ class Lexer: private Noncopyable<Lexer>::type {
|
|
1262
875
|
++line_number_;
|
1263
876
|
}
|
1264
877
|
|
1265
|
-
|
1266
|
-
std::
|
878
|
+
const Source* source_;
|
879
|
+
std::string buffer8_;
|
1267
880
|
std::vector<uc16> buffer16_;
|
1268
881
|
double numeric_;
|
1269
882
|
State type_;
|