iv-phonic 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,22 +9,17 @@
9
9
  #include "uchar.h"
10
10
  #include "chars.h"
11
11
  #include "token.h"
12
- #include "source.h"
13
12
  #include "location.h"
14
13
  #include "noncopyable.h"
14
+ #include "keyword.h"
15
15
 
16
16
  namespace iv {
17
17
  namespace core {
18
18
 
19
- class Lexer: private Noncopyable<Lexer>::type {
19
+ template<typename Source>
20
+ class Lexer: private Noncopyable<Lexer<Source> >::type {
20
21
  public:
21
- enum LexType {
22
- kClear = 0,
23
- kIdentifyReservedWords = 1,
24
- kIgnoreReservedWords = 2,
25
- kIgnoreReservedWordsAndIdentifyGetterOrSetter = 4,
26
- kStrict = 8
27
- };
22
+
28
23
  enum State {
29
24
  NONE,
30
25
  ESCAPE,
@@ -33,9 +28,9 @@ class Lexer: private Noncopyable<Lexer>::type {
33
28
  OCTAL
34
29
  };
35
30
 
36
- explicit Lexer(BasicSource* src)
31
+ explicit Lexer(const Source* src)
37
32
  : source_(src),
38
- buffer8_(kInitialReadBufferCapacity),
33
+ buffer8_(),
39
34
  buffer16_(kInitialReadBufferCapacity),
40
35
  pos_(0),
41
36
  end_(source_->size()),
@@ -46,15 +41,16 @@ class Lexer: private Noncopyable<Lexer>::type {
46
41
  Initialize();
47
42
  }
48
43
 
49
- Token::Type Next(int type) {
50
- Token::Type token;
44
+ template<typename LexType>
45
+ typename Token::Type Next(bool strict) {
46
+ typename Token::Type token;
51
47
  has_line_terminator_before_next_ = false;
52
48
  do {
53
- location_.begin_position_ = pos();
54
49
  while (Chars::IsWhiteSpace(c_)) {
55
50
  // white space
56
51
  Advance();
57
52
  }
53
+ location_.set_begin_position(pos() - 1);
58
54
  switch (c_) {
59
55
  case '"':
60
56
  case '\'':
@@ -252,7 +248,12 @@ class Lexer: private Noncopyable<Lexer>::type {
252
248
  case '^':
253
249
  // ^
254
250
  Advance();
255
- token = Token::BIT_XOR;
251
+ if (c_ == '=') {
252
+ Advance();
253
+ token = Token::ASSIGN_BIT_XOR;
254
+ } else {
255
+ token = Token::BIT_XOR;
256
+ }
256
257
  break;
257
258
 
258
259
  case '.':
@@ -338,7 +339,7 @@ class Lexer: private Noncopyable<Lexer>::type {
338
339
 
339
340
  default:
340
341
  if (Chars::IsIdentifierStart(c_)) {
341
- token = ScanIdentifier(type);
342
+ token = ScanIdentifier<LexType>(strict);
342
343
  } else if (Chars::IsDecimalDigit(c_)) {
343
344
  token = ScanNumber(false);
344
345
  } else if (Chars::IsLineTerminator(c_)) {
@@ -354,7 +355,7 @@ class Lexer: private Noncopyable<Lexer>::type {
354
355
  break;
355
356
  }
356
357
  } while (token == Token::NOT_FOUND);
357
- location_.end_position_ = pos();
358
+ location_.set_end_position(pos() - 1);
358
359
  return token;
359
360
  }
360
361
 
@@ -362,7 +363,7 @@ class Lexer: private Noncopyable<Lexer>::type {
362
363
  return buffer16_;
363
364
  }
364
365
 
365
- inline const std::vector<char>& Buffer8() const {
366
+ inline const std::string& Buffer8() const {
366
367
  return buffer8_;
367
368
  }
368
369
 
@@ -400,7 +401,7 @@ class Lexer: private Noncopyable<Lexer>::type {
400
401
  return pos_;
401
402
  }
402
403
 
403
- inline BasicSource* source() const {
404
+ inline const Source* source() const {
404
405
  return source_;
405
406
  }
406
407
 
@@ -408,7 +409,17 @@ class Lexer: private Noncopyable<Lexer>::type {
408
409
  return location_;
409
410
  }
410
411
 
412
+ inline std::size_t begin_position() const {
413
+ return location_.begin_position();
414
+ }
415
+
416
+ inline std::size_t end_position() const {
417
+ return location_.end_position();
418
+ }
419
+
411
420
  bool ScanRegExpLiteral(bool contains_eq) {
421
+ // location begin_position is the same with DIV
422
+ // so, no need to set
412
423
  bool character = false;
413
424
  buffer16_.clear();
414
425
  if (contains_eq) {
@@ -435,7 +446,7 @@ class Lexer: private Noncopyable<Lexer>::type {
435
446
  Record16Advance();
436
447
  }
437
448
  }
438
- Advance();
449
+ Advance(); // waste '/'
439
450
  return true;
440
451
  }
441
452
 
@@ -449,8 +460,9 @@ class Lexer: private Noncopyable<Lexer>::type {
449
460
  return false;
450
461
  }
451
462
  Advance();
452
- uc = ScanHexEscape('u', 4);
453
- if (uc == '\\') {
463
+ bool ng = false;
464
+ uc = ScanHexEscape('u', 4, &ng);
465
+ if (ng || uc == '\\') {
454
466
  return false;
455
467
  }
456
468
  Record16(uc);
@@ -458,6 +470,7 @@ class Lexer: private Noncopyable<Lexer>::type {
458
470
  Record16Advance();
459
471
  }
460
472
  }
473
+ location_.set_end_position(pos() - 1);
461
474
  return true;
462
475
  }
463
476
 
@@ -501,33 +514,6 @@ class Lexer: private Noncopyable<Lexer>::type {
501
514
  }
502
515
  }
503
516
 
504
- inline Token::Type IsMatch(char const * keyword,
505
- std::size_t len,
506
- Token::Type guess, bool strict) const {
507
- if (!strict) {
508
- return Token::IDENTIFIER;
509
- }
510
- std::vector<uc16>::const_iterator it = buffer16_.begin();
511
- do {
512
- if (*it++ != *keyword++) {
513
- return Token::IDENTIFIER;
514
- }
515
- } while (--len);
516
- return guess;
517
- }
518
-
519
- inline Token::Type IsMatch(char const * keyword,
520
- std::size_t len,
521
- Token::Type guess) const {
522
- std::vector<uc16>::const_iterator it = buffer16_.begin();
523
- do {
524
- if (*it++ != *keyword++) {
525
- return Token::IDENTIFIER;
526
- }
527
- } while (--len);
528
- return guess;
529
- }
530
-
531
517
  Token::Type SkipSingleLineComment() {
532
518
  Advance();
533
519
  // see ECMA-262 section 7.4
@@ -582,8 +568,8 @@ class Lexer: private Noncopyable<Lexer>::type {
582
568
  return Token::NOT_FOUND;
583
569
  }
584
570
 
585
- Token::Type ScanIdentifier(int type) {
586
- Token::Type token = Token::IDENTIFIER;
571
+ template<typename LexType>
572
+ Token::Type ScanIdentifier(bool strict) {
587
573
  uc16 uc;
588
574
 
589
575
  buffer16_.clear();
@@ -594,8 +580,9 @@ class Lexer: private Noncopyable<Lexer>::type {
594
580
  return Token::ILLEGAL;
595
581
  }
596
582
  Advance();
597
- uc = ScanHexEscape('u', 4);
598
- if (uc == '\\' || !Chars::IsIdentifierStart(uc)) {
583
+ bool ng = false;
584
+ uc = ScanHexEscape('u', 4, &ng);
585
+ if (ng || uc == '\\' || !Chars::IsIdentifierStart(uc)) {
599
586
  return Token::ILLEGAL;
600
587
  }
601
588
  Record16(uc);
@@ -610,8 +597,9 @@ class Lexer: private Noncopyable<Lexer>::type {
610
597
  return Token::ILLEGAL;
611
598
  }
612
599
  Advance();
613
- uc = ScanHexEscape('u', 4);
614
- if (uc == '\\' || !Chars::IsIdentifierPart(uc)) {
600
+ bool ng = false;
601
+ uc = ScanHexEscape('u', 4, &ng);
602
+ if (ng || uc == '\\' || !Chars::IsIdentifierPart(uc)) {
615
603
  return Token::ILLEGAL;
616
604
  }
617
605
  Record16(uc);
@@ -620,402 +608,7 @@ class Lexer: private Noncopyable<Lexer>::type {
620
608
  }
621
609
  }
622
610
 
623
- if (type & kIdentifyReservedWords) {
624
- token = DetectKeyword(type & kStrict);
625
- } else if (type & kIgnoreReservedWordsAndIdentifyGetterOrSetter) {
626
- token = DetectGetOrSet();
627
- }
628
-
629
- return token;
630
- }
631
-
632
- // detect which Identifier is Keyword, FutureReservedWord or not
633
- // Keyword and FutureReservedWord are defined in ECMA-262 5th.
634
- //
635
- // Some words such as :
636
- // int, short, boolean, byte, long, char, float, double, abstract, volatile,
637
- // transient, final, throws, goto, native, synchronized
638
- // were defined as FutureReservedWord in ECMA-262 3rd, but not in 5th.
639
- // So, DetectKeyword interprets them as Identifier.
640
- Token::Type DetectKeyword(bool strict) const {
641
- const std::size_t len = buffer16_.size();
642
- Token::Type token = Token::IDENTIFIER;
643
- switch (len) {
644
- case 2:
645
- // if in do
646
- if (buffer16_[0] == 'i') {
647
- if (buffer16_[1] == 'f') {
648
- token = Token::IF;
649
- } else if (buffer16_[1] == 'n') {
650
- token = Token::IN;
651
- }
652
- } else if (buffer16_[0] == 'd' && buffer16_[1] == 'o') {
653
- // do
654
- token = Token::DO;
655
- }
656
- break;
657
- case 3:
658
- // for var int new try let
659
- switch (buffer16_[2]) {
660
- case 't':
661
- if (buffer16_[0] == 'l' && buffer16_[1] == 'e' && strict) {
662
- // let
663
- token = Token::LET;
664
- } else if (buffer16_[0] == 'i' && buffer16_[1] == 'n') {
665
- // int (removed)
666
- // token = Token::INT;
667
- }
668
- break;
669
- case 'r':
670
- // for var
671
- if (buffer16_[0] == 'f' && buffer16_[1] == 'o') {
672
- // for
673
- token = Token::FOR;
674
- } else if (buffer16_[0] == 'v' && buffer16_[1] == 'a') {
675
- // var
676
- token = Token::VAR;
677
- }
678
- break;
679
- case 'y':
680
- // try
681
- if (buffer16_[0] == 't' && buffer16_[1] == 'r') {
682
- token = Token::TRY;
683
- }
684
- break;
685
- case 'w':
686
- // new
687
- if (buffer16_[0] == 'n' && buffer16_[1] == 'e') {
688
- token = Token::NEW;
689
- }
690
- break;
691
- }
692
- break;
693
- case 4:
694
- // else case true byte null this
695
- // void with long enum char goto
696
- // number 3 character is most duplicated
697
- switch (buffer16_[3]) {
698
- case 'e':
699
- // else case true byte
700
- if (buffer16_[2] == 's') {
701
- if (buffer16_[0] == 'e' && buffer16_[1] == 'l') {
702
- // else
703
- token = Token::ELSE;
704
- } else if (buffer16_[0] == 'c' && buffer16_[1] == 'a') {
705
- // case
706
- token = Token::CASE;
707
- }
708
- } else if (buffer16_[0] == 't' &&
709
- buffer16_[1] == 'r' && buffer16_[2] == 'u') {
710
- // true
711
- token = Token::TRUE_LITERAL;
712
- } else if (buffer16_[0] == 'b' &&
713
- buffer16_[1] == 'y' && buffer16_[2] == 't') {
714
- // byte (removed)
715
- // token = Token::BYTE;
716
- }
717
- break;
718
- case 'l':
719
- // null
720
- if (buffer16_[0] == 'n' &&
721
- buffer16_[1] == 'u' && buffer16_[2] == 'l') {
722
- token = Token::NULL_LITERAL;
723
- }
724
- break;
725
- case 's':
726
- // this
727
- if (buffer16_[0] == 't' &&
728
- buffer16_[1] == 'h' && buffer16_[2] == 'i') {
729
- token = Token::THIS;
730
- }
731
- break;
732
- case 'd':
733
- // void
734
- if (buffer16_[0] == 'v' &&
735
- buffer16_[1] == 'o' && buffer16_[2] == 'i') {
736
- token = Token::VOID;
737
- }
738
- break;
739
- case 'h':
740
- // with
741
- if (buffer16_[0] == 'w' &&
742
- buffer16_[1] == 'i' && buffer16_[2] == 't') {
743
- token = Token::WITH;
744
- }
745
- break;
746
- case 'g':
747
- // long (removed)
748
- if (buffer16_[0] == 'l' &&
749
- buffer16_[1] == 'o' && buffer16_[2] == 'n') {
750
- // token = Token::LONG;
751
- }
752
- break;
753
- case 'm':
754
- // enum
755
- if (buffer16_[0] == 'e' &&
756
- buffer16_[1] == 'n' && buffer16_[2] == 'u') {
757
- token = Token::ENUM;
758
- }
759
- break;
760
- case 'r':
761
- // char (removed)
762
- if (buffer16_[0] == 'c' &&
763
- buffer16_[1] == 'h' && buffer16_[2] == 'a') {
764
- // token = Token::CHAR;
765
- }
766
- break;
767
- case 'o':
768
- // goto (removed)
769
- if (buffer16_[0] == 'g' &&
770
- buffer16_[1] == 'o' && buffer16_[2] == 't') {
771
- // token = Token::GOTO;
772
- }
773
- break;
774
- }
775
- break;
776
- case 5:
777
- // break final float catch super while
778
- // throw short class const false yield
779
- // number 3 character is most duplicated
780
- switch (buffer16_[3]) {
781
- case 'a':
782
- // break final float
783
- if (buffer16_[0] == 'b' && buffer16_[1] == 'r' &&
784
- buffer16_[2] == 'e' && buffer16_[4] == 'k') {
785
- // break
786
- token = Token::BREAK;
787
- } else if (buffer16_[0] == 'f') {
788
- if (buffer16_[1] == 'i' &&
789
- buffer16_[2] == 'n' && buffer16_[4] == 'l') {
790
- // final (removed)
791
- // token = Token::FINAL;
792
- } else if (buffer16_[1] == 'l' &&
793
- buffer16_[2] == 'o' && buffer16_[4] == 't') {
794
- // float (removed)
795
- // token = Token::FLOAT;
796
- }
797
- }
798
- break;
799
- case 'c':
800
- if (buffer16_[0] == 'c' && buffer16_[1] == 'a' &&
801
- buffer16_[2] == 't' && buffer16_[4] == 'h') {
802
- // catch
803
- token = Token::CATCH;
804
- }
805
- break;
806
- case 'e':
807
- if (buffer16_[0] == 's' && buffer16_[1] == 'u' &&
808
- buffer16_[2] == 'p' && buffer16_[4] == 'r') {
809
- // super
810
- token = Token::SUPER;
811
- }
812
- break;
813
- case 'l':
814
- if (buffer16_[0] == 'w' && buffer16_[1] == 'h' &&
815
- buffer16_[2] == 'i' && buffer16_[4] == 'e') {
816
- // while
817
- token = Token::WHILE;
818
- } else if (strict &&
819
- buffer16_[0] == 'y' && buffer16_[1] == 'i' &&
820
- buffer16_[2] == 'e' && buffer16_[4] == 'd') {
821
- // yield
822
- token = Token::YIELD;
823
- }
824
- break;
825
- case 'o':
826
- if (buffer16_[0] == 't' && buffer16_[1] == 'h' &&
827
- buffer16_[2] == 'r' && buffer16_[4] == 'w') {
828
- // throw
829
- token = Token::THROW;
830
- }
831
- break;
832
- case 'r':
833
- if (buffer16_[0] == 's' && buffer16_[1] == 'h' &&
834
- buffer16_[2] == 'o' && buffer16_[4] == 't') {
835
- // short (removed)
836
- // token = Token::SHORT;
837
- }
838
- break;
839
- case 's':
840
- // class const false
841
- if (buffer16_[0] == 'c') {
842
- if (buffer16_[1] == 'l' &&
843
- buffer16_[2] == 'a' && buffer16_[4] == 's') {
844
- // class
845
- token = Token::CLASS;
846
- } else if (buffer16_[1] == 'o' &&
847
- buffer16_[2] == 'n' && buffer16_[4] == 't') {
848
- // const
849
- token = Token::CONST;
850
- }
851
- } else if (buffer16_[0] == 'f' && buffer16_[1] == 'a' &&
852
- buffer16_[2] == 'l' && buffer16_[4] == 'e') {
853
- // false
854
- token = Token::FALSE_LITERAL;
855
- }
856
- break;
857
- }
858
- break;
859
- case 6:
860
- // double delete export import native
861
- // public return static switch typeof throws
862
- // number 0 character is most duplicated
863
- switch (buffer16_[0]) {
864
- case 'd':
865
- // double delete
866
- if (buffer16_[5] == 'e' &&
867
- buffer16_[4] == 'l' && buffer16_[3] == 'b' &&
868
- buffer16_[2] == 'u' && buffer16_[1] == 'o') {
869
- // double
870
- // token = Token::DOUBLE;
871
- } else if (buffer16_[5] == 'e' &&
872
- buffer16_[4] == 't' && buffer16_[3] == 'e' &&
873
- buffer16_[2] == 'l' && buffer16_[1] == 'e') {
874
- // delete
875
- token = Token::DELETE;
876
- }
877
- break;
878
- case 'e':
879
- // export
880
- token = IsMatch("export", len, Token::EXPORT);
881
- break;
882
- case 'i':
883
- // import
884
- token = IsMatch("import", len, Token::IMPORT);
885
- break;
886
- case 'n':
887
- // native (removed)
888
- // token = IsMatch("native", len, Token::NATIVE);
889
- break;
890
- case 'p':
891
- // public
892
- token = IsMatch("public", len, Token::PUBLIC, strict);
893
- break;
894
- case 'r':
895
- // return
896
- token = IsMatch("return", len, Token::RETURN);
897
- break;
898
- case 's':
899
- // switch static
900
- if (buffer16_[1] == 'w' &&
901
- buffer16_[2] == 'i' && buffer16_[3] == 't' &&
902
- buffer16_[4] == 'c' && buffer16_[5] == 'h') {
903
- // switch
904
- token = Token::SWITCH;
905
- } else if (strict &&
906
- buffer16_[1] == 't' &&
907
- buffer16_[2] == 'a' && buffer16_[3] == 't' &&
908
- buffer16_[4] == 'i' && buffer16_[5] == 'c') {
909
- // static
910
- token = Token::STATIC;
911
- }
912
- break;
913
- case 't':
914
- // typeof throws
915
- if (buffer16_[5] == 'f' &&
916
- buffer16_[4] == 'o' && buffer16_[3] == 'e' &&
917
- buffer16_[2] == 'p' && buffer16_[1] == 'y') {
918
- // typeof
919
- token = Token::TYPEOF;
920
- } else if (buffer16_[5] == 's' &&
921
- buffer16_[4] == 'w' && buffer16_[3] == 'o' &&
922
- buffer16_[2] == 'r' && buffer16_[1] == 'h') {
923
- // throws (removed)
924
- // token = Token::THROWS;
925
- }
926
- break;
927
- }
928
- break;
929
- case 7:
930
- // boolean default extends finally package private
931
- // number 0 character is most duplicated
932
- switch (buffer16_[0]) {
933
- case 'b':
934
- // boolean (removed)
935
- // token = IsMatch("boolean", len, Token::BOOLEAN);
936
- break;
937
- case 'd':
938
- token = IsMatch("default", len, Token::DEFAULT);
939
- break;
940
- case 'e':
941
- token = IsMatch("extends", len, Token::EXTENDS);
942
- break;
943
- case 'f':
944
- token = IsMatch("finally", len, Token::FINALLY);
945
- break;
946
- case 'p':
947
- if (buffer16_[1] == 'a') {
948
- token = IsMatch("package", len, Token::PACKAGE, strict);
949
- } else if (buffer16_[1] == 'r') {
950
- token = IsMatch("private", len, Token::PRIVATE, strict);
951
- }
952
- break;
953
- }
954
- break;
955
- case 8:
956
- // debugger continue abstract volatile function
957
- // number 4 character is most duplicated
958
- switch (buffer16_[4]) {
959
- case 'g':
960
- token = IsMatch("debugger", len, Token::DEBUGGER);
961
- break;
962
- case 'i':
963
- token = IsMatch("continue", len, Token::CONTINUE);
964
- break;
965
- case 'r':
966
- // abstract (removed)
967
- // token = IsMatch("abstract", len, Token::ABSTRACT);
968
- break;
969
- case 't':
970
- if (buffer16_[1] == 'o') {
971
- // token = IsMatch("volatile", len, Token::VOLATILE);
972
- } else if (buffer16_[1] == 'u') {
973
- token = IsMatch("function", len, Token::FUNCTION);
974
- }
975
- break;
976
- }
977
- break;
978
- case 9:
979
- // interface protected transient
980
- if (buffer16_[1] == 'n') {
981
- token = IsMatch("interface", len, Token::INTERFACE, strict);
982
- } else if (buffer16_[1] == 'r') {
983
- if (buffer16_[0] == 'p') {
984
- token = IsMatch("protected", len, Token::PROTECTED, strict);
985
- } else if (buffer16_[0] == 't') {
986
- // transient (removed)
987
- // token = IsMatch("transient", len, Token::TRANSIENT);
988
- }
989
- }
990
- break;
991
- case 10:
992
- // instanceof implements
993
- if (buffer16_[1] == 'n') {
994
- token = IsMatch("instanceof", len, Token::INSTANCEOF);
995
- } else if (buffer16_[1] == 'm') {
996
- token = IsMatch("implements", len, Token::IMPLEMENTS, strict);
997
- }
998
- break;
999
- case 12:
1000
- // synchronized (removed)
1001
- // token = IsMatch("synchronized", len, Token::SYNCHRONIZED);
1002
- token = Token::IDENTIFIER;
1003
- break;
1004
- }
1005
- return token;
1006
- }
1007
-
1008
- Token::Type DetectGetOrSet() const {
1009
- if (buffer16_.size() == 3) {
1010
- if (buffer16_[1] == 'e' && buffer16_[2] == 't') {
1011
- if (buffer16_[0] == 'g') {
1012
- return Token::GET;
1013
- } else if (buffer16_[0] == 's') {
1014
- return Token::SET;
1015
- }
1016
- }
1017
- }
1018
- return Token::IDENTIFIER;
611
+ return detail::Keyword<LexType>::Detect(buffer16_, strict);
1019
612
  }
1020
613
 
1021
614
  Token::Type ScanString() {
@@ -1031,7 +624,9 @@ class Lexer: private Noncopyable<Lexer>::type {
1031
624
  if (type_ == NONE) {
1032
625
  type_ = ESCAPE;
1033
626
  }
1034
- ScanEscape();
627
+ if (!ScanEscape()) {
628
+ return Token::ILLEGAL;
629
+ }
1035
630
  } else {
1036
631
  Record16Advance();
1037
632
  }
@@ -1045,10 +640,10 @@ class Lexer: private Noncopyable<Lexer>::type {
1045
640
  return Token::STRING;
1046
641
  }
1047
642
 
1048
- void ScanEscape() {
643
+ bool ScanEscape() {
1049
644
  if (Chars::IsLineTerminator(c_)) {
1050
645
  SkipLineTerminator();
1051
- return;
646
+ return true;
1052
647
  }
1053
648
  switch (c_) {
1054
649
  case '\'':
@@ -1076,18 +671,30 @@ class Lexer: private Noncopyable<Lexer>::type {
1076
671
  Record16('\t');
1077
672
  Advance();
1078
673
  break;
1079
- case 'u' :
674
+ case 'u' : {
1080
675
  Advance();
1081
- Record16(ScanHexEscape('u', 4));
676
+ bool ng = false;
677
+ const uc16 uc = ScanHexEscape('u', 4, &ng);
678
+ if (ng) {
679
+ return false;
680
+ }
681
+ Record16(uc);
1082
682
  break;
683
+ }
1083
684
  case 'v' :
1084
685
  Record16('\v');
1085
686
  Advance();
1086
687
  break;
1087
- case 'x' :
688
+ case 'x' : {
1088
689
  Advance();
1089
- Record16(ScanHexEscape('x', 2));
690
+ bool ng = false;
691
+ const uc16 uc = ScanHexEscape('x', 2, &ng);
692
+ if (ng) {
693
+ return false;
694
+ }
695
+ Record16(uc);
1090
696
  break;
697
+ }
1091
698
  case '0' :
1092
699
  case '1' :
1093
700
  case '2' :
@@ -1102,10 +709,16 @@ class Lexer: private Noncopyable<Lexer>::type {
1102
709
  Record16(ScanOctalEscape());
1103
710
  break;
1104
711
 
712
+ case '8' :
713
+ case '9' :
714
+ // section 7.8.4 and B1.2
715
+ return false;
716
+
1105
717
  default:
1106
718
  Record16Advance();
1107
719
  break;
1108
720
  }
721
+ return true;
1109
722
  }
1110
723
 
1111
724
  Token::Type ScanNumber(const bool period) {
@@ -1181,14 +794,13 @@ class Lexer: private Noncopyable<Lexer>::type {
1181
794
 
1182
795
  if (type == OCTAL) {
1183
796
  double val = 0;
1184
- for (std::vector<char>::const_iterator it = buffer8_.begin(),
797
+ for (std::string::const_iterator it = buffer8_.begin(),
1185
798
  last = buffer8_.end(); it != last; ++it) {
1186
799
  val = val * 8 + (*it - '0');
1187
800
  }
1188
801
  numeric_ = val;
1189
802
  } else {
1190
- Record8('\0'); // Null Terminated String
1191
- numeric_ = std::strtod(buffer8_.data(), NULL);
803
+ numeric_ = std::strtod(buffer8_.c_str(), NULL);
1192
804
  }
1193
805
  type_ = type;
1194
806
  return Token::NUMBER;
@@ -1211,7 +823,7 @@ class Lexer: private Noncopyable<Lexer>::type {
1211
823
  return res;
1212
824
  }
1213
825
 
1214
- uc16 ScanHexEscape(uc16 c, int len) {
826
+ uc16 ScanHexEscape(uc16 c, int len, bool* ng) {
1215
827
  uc16 res = 0;
1216
828
  for (int i = 0; i < len; ++i) {
1217
829
  const int d = HexValue(c_);
@@ -1219,6 +831,7 @@ class Lexer: private Noncopyable<Lexer>::type {
1219
831
  for (int j = i - 1; j >= 0; --j) {
1220
832
  PushBack();
1221
833
  }
834
+ *ng = true;
1222
835
  return c;
1223
836
  }
1224
837
  res = res * 16 + d;
@@ -1262,8 +875,8 @@ class Lexer: private Noncopyable<Lexer>::type {
1262
875
  ++line_number_;
1263
876
  }
1264
877
 
1265
- BasicSource* source_;
1266
- std::vector<char> buffer8_;
878
+ const Source* source_;
879
+ std::string buffer8_;
1267
880
  std::vector<uc16> buffer16_;
1268
881
  double numeric_;
1269
882
  State type_;