iv-phonic 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,22 +9,17 @@
9
9
  #include "uchar.h"
10
10
  #include "chars.h"
11
11
  #include "token.h"
12
- #include "source.h"
13
12
  #include "location.h"
14
13
  #include "noncopyable.h"
14
+ #include "keyword.h"
15
15
 
16
16
  namespace iv {
17
17
  namespace core {
18
18
 
19
- class Lexer: private Noncopyable<Lexer>::type {
19
+ template<typename Source>
20
+ class Lexer: private Noncopyable<Lexer<Source> >::type {
20
21
  public:
21
- enum LexType {
22
- kClear = 0,
23
- kIdentifyReservedWords = 1,
24
- kIgnoreReservedWords = 2,
25
- kIgnoreReservedWordsAndIdentifyGetterOrSetter = 4,
26
- kStrict = 8
27
- };
22
+
28
23
  enum State {
29
24
  NONE,
30
25
  ESCAPE,
@@ -33,9 +28,9 @@ class Lexer: private Noncopyable<Lexer>::type {
33
28
  OCTAL
34
29
  };
35
30
 
36
- explicit Lexer(BasicSource* src)
31
+ explicit Lexer(const Source* src)
37
32
  : source_(src),
38
- buffer8_(kInitialReadBufferCapacity),
33
+ buffer8_(),
39
34
  buffer16_(kInitialReadBufferCapacity),
40
35
  pos_(0),
41
36
  end_(source_->size()),
@@ -46,15 +41,16 @@ class Lexer: private Noncopyable<Lexer>::type {
46
41
  Initialize();
47
42
  }
48
43
 
49
- Token::Type Next(int type) {
50
- Token::Type token;
44
+ template<typename LexType>
45
+ typename Token::Type Next(bool strict) {
46
+ typename Token::Type token;
51
47
  has_line_terminator_before_next_ = false;
52
48
  do {
53
- location_.begin_position_ = pos();
54
49
  while (Chars::IsWhiteSpace(c_)) {
55
50
  // white space
56
51
  Advance();
57
52
  }
53
+ location_.set_begin_position(pos() - 1);
58
54
  switch (c_) {
59
55
  case '"':
60
56
  case '\'':
@@ -252,7 +248,12 @@ class Lexer: private Noncopyable<Lexer>::type {
252
248
  case '^':
253
249
  // ^
254
250
  Advance();
255
- token = Token::BIT_XOR;
251
+ if (c_ == '=') {
252
+ Advance();
253
+ token = Token::ASSIGN_BIT_XOR;
254
+ } else {
255
+ token = Token::BIT_XOR;
256
+ }
256
257
  break;
257
258
 
258
259
  case '.':
@@ -338,7 +339,7 @@ class Lexer: private Noncopyable<Lexer>::type {
338
339
 
339
340
  default:
340
341
  if (Chars::IsIdentifierStart(c_)) {
341
- token = ScanIdentifier(type);
342
+ token = ScanIdentifier<LexType>(strict);
342
343
  } else if (Chars::IsDecimalDigit(c_)) {
343
344
  token = ScanNumber(false);
344
345
  } else if (Chars::IsLineTerminator(c_)) {
@@ -354,7 +355,7 @@ class Lexer: private Noncopyable<Lexer>::type {
354
355
  break;
355
356
  }
356
357
  } while (token == Token::NOT_FOUND);
357
- location_.end_position_ = pos();
358
+ location_.set_end_position(pos() - 1);
358
359
  return token;
359
360
  }
360
361
 
@@ -362,7 +363,7 @@ class Lexer: private Noncopyable<Lexer>::type {
362
363
  return buffer16_;
363
364
  }
364
365
 
365
- inline const std::vector<char>& Buffer8() const {
366
+ inline const std::string& Buffer8() const {
366
367
  return buffer8_;
367
368
  }
368
369
 
@@ -400,7 +401,7 @@ class Lexer: private Noncopyable<Lexer>::type {
400
401
  return pos_;
401
402
  }
402
403
 
403
- inline BasicSource* source() const {
404
+ inline const Source* source() const {
404
405
  return source_;
405
406
  }
406
407
 
@@ -408,7 +409,17 @@ class Lexer: private Noncopyable<Lexer>::type {
408
409
  return location_;
409
410
  }
410
411
 
412
+ inline std::size_t begin_position() const {
413
+ return location_.begin_position();
414
+ }
415
+
416
+ inline std::size_t end_position() const {
417
+ return location_.end_position();
418
+ }
419
+
411
420
  bool ScanRegExpLiteral(bool contains_eq) {
421
+ // location begin_position is the same with DIV
422
+ // so, no need to set
412
423
  bool character = false;
413
424
  buffer16_.clear();
414
425
  if (contains_eq) {
@@ -435,7 +446,7 @@ class Lexer: private Noncopyable<Lexer>::type {
435
446
  Record16Advance();
436
447
  }
437
448
  }
438
- Advance();
449
+ Advance(); // waste '/'
439
450
  return true;
440
451
  }
441
452
 
@@ -449,8 +460,9 @@ class Lexer: private Noncopyable<Lexer>::type {
449
460
  return false;
450
461
  }
451
462
  Advance();
452
- uc = ScanHexEscape('u', 4);
453
- if (uc == '\\') {
463
+ bool ng = false;
464
+ uc = ScanHexEscape('u', 4, &ng);
465
+ if (ng || uc == '\\') {
454
466
  return false;
455
467
  }
456
468
  Record16(uc);
@@ -458,6 +470,7 @@ class Lexer: private Noncopyable<Lexer>::type {
458
470
  Record16Advance();
459
471
  }
460
472
  }
473
+ location_.set_end_position(pos() - 1);
461
474
  return true;
462
475
  }
463
476
 
@@ -501,33 +514,6 @@ class Lexer: private Noncopyable<Lexer>::type {
501
514
  }
502
515
  }
503
516
 
504
- inline Token::Type IsMatch(char const * keyword,
505
- std::size_t len,
506
- Token::Type guess, bool strict) const {
507
- if (!strict) {
508
- return Token::IDENTIFIER;
509
- }
510
- std::vector<uc16>::const_iterator it = buffer16_.begin();
511
- do {
512
- if (*it++ != *keyword++) {
513
- return Token::IDENTIFIER;
514
- }
515
- } while (--len);
516
- return guess;
517
- }
518
-
519
- inline Token::Type IsMatch(char const * keyword,
520
- std::size_t len,
521
- Token::Type guess) const {
522
- std::vector<uc16>::const_iterator it = buffer16_.begin();
523
- do {
524
- if (*it++ != *keyword++) {
525
- return Token::IDENTIFIER;
526
- }
527
- } while (--len);
528
- return guess;
529
- }
530
-
531
517
  Token::Type SkipSingleLineComment() {
532
518
  Advance();
533
519
  // see ECMA-262 section 7.4
@@ -582,8 +568,8 @@ class Lexer: private Noncopyable<Lexer>::type {
582
568
  return Token::NOT_FOUND;
583
569
  }
584
570
 
585
- Token::Type ScanIdentifier(int type) {
586
- Token::Type token = Token::IDENTIFIER;
571
+ template<typename LexType>
572
+ Token::Type ScanIdentifier(bool strict) {
587
573
  uc16 uc;
588
574
 
589
575
  buffer16_.clear();
@@ -594,8 +580,9 @@ class Lexer: private Noncopyable<Lexer>::type {
594
580
  return Token::ILLEGAL;
595
581
  }
596
582
  Advance();
597
- uc = ScanHexEscape('u', 4);
598
- if (uc == '\\' || !Chars::IsIdentifierStart(uc)) {
583
+ bool ng = false;
584
+ uc = ScanHexEscape('u', 4, &ng);
585
+ if (ng || uc == '\\' || !Chars::IsIdentifierStart(uc)) {
599
586
  return Token::ILLEGAL;
600
587
  }
601
588
  Record16(uc);
@@ -610,8 +597,9 @@ class Lexer: private Noncopyable<Lexer>::type {
610
597
  return Token::ILLEGAL;
611
598
  }
612
599
  Advance();
613
- uc = ScanHexEscape('u', 4);
614
- if (uc == '\\' || !Chars::IsIdentifierPart(uc)) {
600
+ bool ng = false;
601
+ uc = ScanHexEscape('u', 4, &ng);
602
+ if (ng || uc == '\\' || !Chars::IsIdentifierPart(uc)) {
615
603
  return Token::ILLEGAL;
616
604
  }
617
605
  Record16(uc);
@@ -620,402 +608,7 @@ class Lexer: private Noncopyable<Lexer>::type {
620
608
  }
621
609
  }
622
610
 
623
- if (type & kIdentifyReservedWords) {
624
- token = DetectKeyword(type & kStrict);
625
- } else if (type & kIgnoreReservedWordsAndIdentifyGetterOrSetter) {
626
- token = DetectGetOrSet();
627
- }
628
-
629
- return token;
630
- }
631
-
632
- // detect which Identifier is Keyword, FutureReservedWord or not
633
- // Keyword and FutureReservedWord are defined in ECMA-262 5th.
634
- //
635
- // Some words such as :
636
- // int, short, boolean, byte, long, char, float, double, abstract, volatile,
637
- // transient, final, throws, goto, native, synchronized
638
- // were defined as FutureReservedWord in ECMA-262 3rd, but not in 5th.
639
- // So, DetectKeyword interprets them as Identifier.
640
- Token::Type DetectKeyword(bool strict) const {
641
- const std::size_t len = buffer16_.size();
642
- Token::Type token = Token::IDENTIFIER;
643
- switch (len) {
644
- case 2:
645
- // if in do
646
- if (buffer16_[0] == 'i') {
647
- if (buffer16_[1] == 'f') {
648
- token = Token::IF;
649
- } else if (buffer16_[1] == 'n') {
650
- token = Token::IN;
651
- }
652
- } else if (buffer16_[0] == 'd' && buffer16_[1] == 'o') {
653
- // do
654
- token = Token::DO;
655
- }
656
- break;
657
- case 3:
658
- // for var int new try let
659
- switch (buffer16_[2]) {
660
- case 't':
661
- if (buffer16_[0] == 'l' && buffer16_[1] == 'e' && strict) {
662
- // let
663
- token = Token::LET;
664
- } else if (buffer16_[0] == 'i' && buffer16_[1] == 'n') {
665
- // int (removed)
666
- // token = Token::INT;
667
- }
668
- break;
669
- case 'r':
670
- // for var
671
- if (buffer16_[0] == 'f' && buffer16_[1] == 'o') {
672
- // for
673
- token = Token::FOR;
674
- } else if (buffer16_[0] == 'v' && buffer16_[1] == 'a') {
675
- // var
676
- token = Token::VAR;
677
- }
678
- break;
679
- case 'y':
680
- // try
681
- if (buffer16_[0] == 't' && buffer16_[1] == 'r') {
682
- token = Token::TRY;
683
- }
684
- break;
685
- case 'w':
686
- // new
687
- if (buffer16_[0] == 'n' && buffer16_[1] == 'e') {
688
- token = Token::NEW;
689
- }
690
- break;
691
- }
692
- break;
693
- case 4:
694
- // else case true byte null this
695
- // void with long enum char goto
696
- // number 3 character is most duplicated
697
- switch (buffer16_[3]) {
698
- case 'e':
699
- // else case true byte
700
- if (buffer16_[2] == 's') {
701
- if (buffer16_[0] == 'e' && buffer16_[1] == 'l') {
702
- // else
703
- token = Token::ELSE;
704
- } else if (buffer16_[0] == 'c' && buffer16_[1] == 'a') {
705
- // case
706
- token = Token::CASE;
707
- }
708
- } else if (buffer16_[0] == 't' &&
709
- buffer16_[1] == 'r' && buffer16_[2] == 'u') {
710
- // true
711
- token = Token::TRUE_LITERAL;
712
- } else if (buffer16_[0] == 'b' &&
713
- buffer16_[1] == 'y' && buffer16_[2] == 't') {
714
- // byte (removed)
715
- // token = Token::BYTE;
716
- }
717
- break;
718
- case 'l':
719
- // null
720
- if (buffer16_[0] == 'n' &&
721
- buffer16_[1] == 'u' && buffer16_[2] == 'l') {
722
- token = Token::NULL_LITERAL;
723
- }
724
- break;
725
- case 's':
726
- // this
727
- if (buffer16_[0] == 't' &&
728
- buffer16_[1] == 'h' && buffer16_[2] == 'i') {
729
- token = Token::THIS;
730
- }
731
- break;
732
- case 'd':
733
- // void
734
- if (buffer16_[0] == 'v' &&
735
- buffer16_[1] == 'o' && buffer16_[2] == 'i') {
736
- token = Token::VOID;
737
- }
738
- break;
739
- case 'h':
740
- // with
741
- if (buffer16_[0] == 'w' &&
742
- buffer16_[1] == 'i' && buffer16_[2] == 't') {
743
- token = Token::WITH;
744
- }
745
- break;
746
- case 'g':
747
- // long (removed)
748
- if (buffer16_[0] == 'l' &&
749
- buffer16_[1] == 'o' && buffer16_[2] == 'n') {
750
- // token = Token::LONG;
751
- }
752
- break;
753
- case 'm':
754
- // enum
755
- if (buffer16_[0] == 'e' &&
756
- buffer16_[1] == 'n' && buffer16_[2] == 'u') {
757
- token = Token::ENUM;
758
- }
759
- break;
760
- case 'r':
761
- // char (removed)
762
- if (buffer16_[0] == 'c' &&
763
- buffer16_[1] == 'h' && buffer16_[2] == 'a') {
764
- // token = Token::CHAR;
765
- }
766
- break;
767
- case 'o':
768
- // goto (removed)
769
- if (buffer16_[0] == 'g' &&
770
- buffer16_[1] == 'o' && buffer16_[2] == 't') {
771
- // token = Token::GOTO;
772
- }
773
- break;
774
- }
775
- break;
776
- case 5:
777
- // break final float catch super while
778
- // throw short class const false yield
779
- // number 3 character is most duplicated
780
- switch (buffer16_[3]) {
781
- case 'a':
782
- // break final float
783
- if (buffer16_[0] == 'b' && buffer16_[1] == 'r' &&
784
- buffer16_[2] == 'e' && buffer16_[4] == 'k') {
785
- // break
786
- token = Token::BREAK;
787
- } else if (buffer16_[0] == 'f') {
788
- if (buffer16_[1] == 'i' &&
789
- buffer16_[2] == 'n' && buffer16_[4] == 'l') {
790
- // final (removed)
791
- // token = Token::FINAL;
792
- } else if (buffer16_[1] == 'l' &&
793
- buffer16_[2] == 'o' && buffer16_[4] == 't') {
794
- // float (removed)
795
- // token = Token::FLOAT;
796
- }
797
- }
798
- break;
799
- case 'c':
800
- if (buffer16_[0] == 'c' && buffer16_[1] == 'a' &&
801
- buffer16_[2] == 't' && buffer16_[4] == 'h') {
802
- // catch
803
- token = Token::CATCH;
804
- }
805
- break;
806
- case 'e':
807
- if (buffer16_[0] == 's' && buffer16_[1] == 'u' &&
808
- buffer16_[2] == 'p' && buffer16_[4] == 'r') {
809
- // super
810
- token = Token::SUPER;
811
- }
812
- break;
813
- case 'l':
814
- if (buffer16_[0] == 'w' && buffer16_[1] == 'h' &&
815
- buffer16_[2] == 'i' && buffer16_[4] == 'e') {
816
- // while
817
- token = Token::WHILE;
818
- } else if (strict &&
819
- buffer16_[0] == 'y' && buffer16_[1] == 'i' &&
820
- buffer16_[2] == 'e' && buffer16_[4] == 'd') {
821
- // yield
822
- token = Token::YIELD;
823
- }
824
- break;
825
- case 'o':
826
- if (buffer16_[0] == 't' && buffer16_[1] == 'h' &&
827
- buffer16_[2] == 'r' && buffer16_[4] == 'w') {
828
- // throw
829
- token = Token::THROW;
830
- }
831
- break;
832
- case 'r':
833
- if (buffer16_[0] == 's' && buffer16_[1] == 'h' &&
834
- buffer16_[2] == 'o' && buffer16_[4] == 't') {
835
- // short (removed)
836
- // token = Token::SHORT;
837
- }
838
- break;
839
- case 's':
840
- // class const false
841
- if (buffer16_[0] == 'c') {
842
- if (buffer16_[1] == 'l' &&
843
- buffer16_[2] == 'a' && buffer16_[4] == 's') {
844
- // class
845
- token = Token::CLASS;
846
- } else if (buffer16_[1] == 'o' &&
847
- buffer16_[2] == 'n' && buffer16_[4] == 't') {
848
- // const
849
- token = Token::CONST;
850
- }
851
- } else if (buffer16_[0] == 'f' && buffer16_[1] == 'a' &&
852
- buffer16_[2] == 'l' && buffer16_[4] == 'e') {
853
- // false
854
- token = Token::FALSE_LITERAL;
855
- }
856
- break;
857
- }
858
- break;
859
- case 6:
860
- // double delete export import native
861
- // public return static switch typeof throws
862
- // number 0 character is most duplicated
863
- switch (buffer16_[0]) {
864
- case 'd':
865
- // double delete
866
- if (buffer16_[5] == 'e' &&
867
- buffer16_[4] == 'l' && buffer16_[3] == 'b' &&
868
- buffer16_[2] == 'u' && buffer16_[1] == 'o') {
869
- // double
870
- // token = Token::DOUBLE;
871
- } else if (buffer16_[5] == 'e' &&
872
- buffer16_[4] == 't' && buffer16_[3] == 'e' &&
873
- buffer16_[2] == 'l' && buffer16_[1] == 'e') {
874
- // delete
875
- token = Token::DELETE;
876
- }
877
- break;
878
- case 'e':
879
- // export
880
- token = IsMatch("export", len, Token::EXPORT);
881
- break;
882
- case 'i':
883
- // import
884
- token = IsMatch("import", len, Token::IMPORT);
885
- break;
886
- case 'n':
887
- // native (removed)
888
- // token = IsMatch("native", len, Token::NATIVE);
889
- break;
890
- case 'p':
891
- // public
892
- token = IsMatch("public", len, Token::PUBLIC, strict);
893
- break;
894
- case 'r':
895
- // return
896
- token = IsMatch("return", len, Token::RETURN);
897
- break;
898
- case 's':
899
- // switch static
900
- if (buffer16_[1] == 'w' &&
901
- buffer16_[2] == 'i' && buffer16_[3] == 't' &&
902
- buffer16_[4] == 'c' && buffer16_[5] == 'h') {
903
- // switch
904
- token = Token::SWITCH;
905
- } else if (strict &&
906
- buffer16_[1] == 't' &&
907
- buffer16_[2] == 'a' && buffer16_[3] == 't' &&
908
- buffer16_[4] == 'i' && buffer16_[5] == 'c') {
909
- // static
910
- token = Token::STATIC;
911
- }
912
- break;
913
- case 't':
914
- // typeof throws
915
- if (buffer16_[5] == 'f' &&
916
- buffer16_[4] == 'o' && buffer16_[3] == 'e' &&
917
- buffer16_[2] == 'p' && buffer16_[1] == 'y') {
918
- // typeof
919
- token = Token::TYPEOF;
920
- } else if (buffer16_[5] == 's' &&
921
- buffer16_[4] == 'w' && buffer16_[3] == 'o' &&
922
- buffer16_[2] == 'r' && buffer16_[1] == 'h') {
923
- // throws (removed)
924
- // token = Token::THROWS;
925
- }
926
- break;
927
- }
928
- break;
929
- case 7:
930
- // boolean default extends finally package private
931
- // number 0 character is most duplicated
932
- switch (buffer16_[0]) {
933
- case 'b':
934
- // boolean (removed)
935
- // token = IsMatch("boolean", len, Token::BOOLEAN);
936
- break;
937
- case 'd':
938
- token = IsMatch("default", len, Token::DEFAULT);
939
- break;
940
- case 'e':
941
- token = IsMatch("extends", len, Token::EXTENDS);
942
- break;
943
- case 'f':
944
- token = IsMatch("finally", len, Token::FINALLY);
945
- break;
946
- case 'p':
947
- if (buffer16_[1] == 'a') {
948
- token = IsMatch("package", len, Token::PACKAGE, strict);
949
- } else if (buffer16_[1] == 'r') {
950
- token = IsMatch("private", len, Token::PRIVATE, strict);
951
- }
952
- break;
953
- }
954
- break;
955
- case 8:
956
- // debugger continue abstract volatile function
957
- // number 4 character is most duplicated
958
- switch (buffer16_[4]) {
959
- case 'g':
960
- token = IsMatch("debugger", len, Token::DEBUGGER);
961
- break;
962
- case 'i':
963
- token = IsMatch("continue", len, Token::CONTINUE);
964
- break;
965
- case 'r':
966
- // abstract (removed)
967
- // token = IsMatch("abstract", len, Token::ABSTRACT);
968
- break;
969
- case 't':
970
- if (buffer16_[1] == 'o') {
971
- // token = IsMatch("volatile", len, Token::VOLATILE);
972
- } else if (buffer16_[1] == 'u') {
973
- token = IsMatch("function", len, Token::FUNCTION);
974
- }
975
- break;
976
- }
977
- break;
978
- case 9:
979
- // interface protected transient
980
- if (buffer16_[1] == 'n') {
981
- token = IsMatch("interface", len, Token::INTERFACE, strict);
982
- } else if (buffer16_[1] == 'r') {
983
- if (buffer16_[0] == 'p') {
984
- token = IsMatch("protected", len, Token::PROTECTED, strict);
985
- } else if (buffer16_[0] == 't') {
986
- // transient (removed)
987
- // token = IsMatch("transient", len, Token::TRANSIENT);
988
- }
989
- }
990
- break;
991
- case 10:
992
- // instanceof implements
993
- if (buffer16_[1] == 'n') {
994
- token = IsMatch("instanceof", len, Token::INSTANCEOF);
995
- } else if (buffer16_[1] == 'm') {
996
- token = IsMatch("implements", len, Token::IMPLEMENTS, strict);
997
- }
998
- break;
999
- case 12:
1000
- // synchronized (removed)
1001
- // token = IsMatch("synchronized", len, Token::SYNCHRONIZED);
1002
- token = Token::IDENTIFIER;
1003
- break;
1004
- }
1005
- return token;
1006
- }
1007
-
1008
- Token::Type DetectGetOrSet() const {
1009
- if (buffer16_.size() == 3) {
1010
- if (buffer16_[1] == 'e' && buffer16_[2] == 't') {
1011
- if (buffer16_[0] == 'g') {
1012
- return Token::GET;
1013
- } else if (buffer16_[0] == 's') {
1014
- return Token::SET;
1015
- }
1016
- }
1017
- }
1018
- return Token::IDENTIFIER;
611
+ return detail::Keyword<LexType>::Detect(buffer16_, strict);
1019
612
  }
1020
613
 
1021
614
  Token::Type ScanString() {
@@ -1031,7 +624,9 @@ class Lexer: private Noncopyable<Lexer>::type {
1031
624
  if (type_ == NONE) {
1032
625
  type_ = ESCAPE;
1033
626
  }
1034
- ScanEscape();
627
+ if (!ScanEscape()) {
628
+ return Token::ILLEGAL;
629
+ }
1035
630
  } else {
1036
631
  Record16Advance();
1037
632
  }
@@ -1045,10 +640,10 @@ class Lexer: private Noncopyable<Lexer>::type {
1045
640
  return Token::STRING;
1046
641
  }
1047
642
 
1048
- void ScanEscape() {
643
+ bool ScanEscape() {
1049
644
  if (Chars::IsLineTerminator(c_)) {
1050
645
  SkipLineTerminator();
1051
- return;
646
+ return true;
1052
647
  }
1053
648
  switch (c_) {
1054
649
  case '\'':
@@ -1076,18 +671,30 @@ class Lexer: private Noncopyable<Lexer>::type {
1076
671
  Record16('\t');
1077
672
  Advance();
1078
673
  break;
1079
- case 'u' :
674
+ case 'u' : {
1080
675
  Advance();
1081
- Record16(ScanHexEscape('u', 4));
676
+ bool ng = false;
677
+ const uc16 uc = ScanHexEscape('u', 4, &ng);
678
+ if (ng) {
679
+ return false;
680
+ }
681
+ Record16(uc);
1082
682
  break;
683
+ }
1083
684
  case 'v' :
1084
685
  Record16('\v');
1085
686
  Advance();
1086
687
  break;
1087
- case 'x' :
688
+ case 'x' : {
1088
689
  Advance();
1089
- Record16(ScanHexEscape('x', 2));
690
+ bool ng = false;
691
+ const uc16 uc = ScanHexEscape('x', 2, &ng);
692
+ if (ng) {
693
+ return false;
694
+ }
695
+ Record16(uc);
1090
696
  break;
697
+ }
1091
698
  case '0' :
1092
699
  case '1' :
1093
700
  case '2' :
@@ -1102,10 +709,16 @@ class Lexer: private Noncopyable<Lexer>::type {
1102
709
  Record16(ScanOctalEscape());
1103
710
  break;
1104
711
 
712
+ case '8' :
713
+ case '9' :
714
+ // section 7.8.4 and B1.2
715
+ return false;
716
+
1105
717
  default:
1106
718
  Record16Advance();
1107
719
  break;
1108
720
  }
721
+ return true;
1109
722
  }
1110
723
 
1111
724
  Token::Type ScanNumber(const bool period) {
@@ -1181,14 +794,13 @@ class Lexer: private Noncopyable<Lexer>::type {
1181
794
 
1182
795
  if (type == OCTAL) {
1183
796
  double val = 0;
1184
- for (std::vector<char>::const_iterator it = buffer8_.begin(),
797
+ for (std::string::const_iterator it = buffer8_.begin(),
1185
798
  last = buffer8_.end(); it != last; ++it) {
1186
799
  val = val * 8 + (*it - '0');
1187
800
  }
1188
801
  numeric_ = val;
1189
802
  } else {
1190
- Record8('\0'); // Null Terminated String
1191
- numeric_ = std::strtod(buffer8_.data(), NULL);
803
+ numeric_ = std::strtod(buffer8_.c_str(), NULL);
1192
804
  }
1193
805
  type_ = type;
1194
806
  return Token::NUMBER;
@@ -1211,7 +823,7 @@ class Lexer: private Noncopyable<Lexer>::type {
1211
823
  return res;
1212
824
  }
1213
825
 
1214
- uc16 ScanHexEscape(uc16 c, int len) {
826
+ uc16 ScanHexEscape(uc16 c, int len, bool* ng) {
1215
827
  uc16 res = 0;
1216
828
  for (int i = 0; i < len; ++i) {
1217
829
  const int d = HexValue(c_);
@@ -1219,6 +831,7 @@ class Lexer: private Noncopyable<Lexer>::type {
1219
831
  for (int j = i - 1; j >= 0; --j) {
1220
832
  PushBack();
1221
833
  }
834
+ *ng = true;
1222
835
  return c;
1223
836
  }
1224
837
  res = res * 16 + d;
@@ -1262,8 +875,8 @@ class Lexer: private Noncopyable<Lexer>::type {
1262
875
  ++line_number_;
1263
876
  }
1264
877
 
1265
- BasicSource* source_;
1266
- std::vector<char> buffer8_;
878
+ const Source* source_;
879
+ std::string buffer8_;
1267
880
  std::vector<uc16> buffer16_;
1268
881
  double numeric_;
1269
882
  State type_;