iv-phonic 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +24 -0
- data/Manifest.txt +49 -0
- data/README.rdoc +32 -0
- data/Rakefile +54 -0
- data/ext/include/iv/algorithm.h +23 -0
- data/ext/include/iv/alloc.h +200 -0
- data/ext/include/iv/any.h +71 -0
- data/ext/include/iv/ast-factory.h +277 -0
- data/ext/include/iv/ast-fwd.h +92 -0
- data/ext/include/iv/ast-serializer.h +579 -0
- data/ext/include/iv/ast-visitor.h +121 -0
- data/ext/include/iv/ast.h +1127 -0
- data/ext/include/iv/chars.h +83 -0
- data/ext/include/iv/cmdline.h +830 -0
- data/ext/include/iv/conversions.h +308 -0
- data/ext/include/iv/dtoa.h +20 -0
- data/ext/include/iv/enable_if.h +18 -0
- data/ext/include/iv/errors.h +15 -0
- data/ext/include/iv/fixedcontainer.h +42 -0
- data/ext/include/iv/functor.h +29 -0
- data/ext/include/iv/lexer.h +1281 -0
- data/ext/include/iv/location.h +23 -0
- data/ext/include/iv/mt19937.h +175 -0
- data/ext/include/iv/noncopyable.h +30 -0
- data/ext/include/iv/none.h +10 -0
- data/ext/include/iv/parser.h +2150 -0
- data/ext/include/iv/source.h +27 -0
- data/ext/include/iv/space.h +178 -0
- data/ext/include/iv/static_assert.h +30 -0
- data/ext/include/iv/stringpiece.h +385 -0
- data/ext/include/iv/token.h +311 -0
- data/ext/include/iv/ucdata.h +58 -0
- data/ext/include/iv/uchar.h +8 -0
- data/ext/include/iv/ustring.h +28 -0
- data/ext/include/iv/ustringpiece.h +9 -0
- data/ext/include/iv/utils.h +83 -0
- data/ext/include/iv/xorshift.h +74 -0
- data/ext/iv/phonic/ast-fwd.h +21 -0
- data/ext/iv/phonic/ast.h +10 -0
- data/ext/iv/phonic/creator.h +530 -0
- data/ext/iv/phonic/encoding.h +110 -0
- data/ext/iv/phonic/extconf.rb +5 -0
- data/ext/iv/phonic/factory.h +247 -0
- data/ext/iv/phonic/parser.h +12 -0
- data/ext/iv/phonic/phonic.cc +69 -0
- data/ext/iv/phonic/rnode.h +15 -0
- data/ext/iv/phonic/rparser.h +48 -0
- data/ext/iv/phonic/source.h +146 -0
- data/test/test_iv_phonic.rb +32 -0
- metadata +159 -0
@@ -0,0 +1,1281 @@
|
|
1
|
+
#ifndef _IV_LEXER_H_
|
2
|
+
#define _IV_LEXER_H_
|
3
|
+
|
4
|
+
#include <cstddef>
|
5
|
+
#include <cassert>
|
6
|
+
#include <cstdlib>
|
7
|
+
#include <vector>
|
8
|
+
#include <string>
|
9
|
+
#include "uchar.h"
|
10
|
+
#include "chars.h"
|
11
|
+
#include "token.h"
|
12
|
+
#include "source.h"
|
13
|
+
#include "location.h"
|
14
|
+
#include "noncopyable.h"
|
15
|
+
|
16
|
+
namespace iv {
|
17
|
+
namespace core {
|
18
|
+
|
19
|
+
class Lexer: private Noncopyable<Lexer>::type {
|
20
|
+
public:
|
21
|
+
enum LexType {
|
22
|
+
kClear = 0,
|
23
|
+
kIdentifyReservedWords = 1,
|
24
|
+
kIgnoreReservedWords = 2,
|
25
|
+
kIgnoreReservedWordsAndIdentifyGetterOrSetter = 4,
|
26
|
+
kStrict = 8
|
27
|
+
};
|
28
|
+
enum State {
|
29
|
+
NONE,
|
30
|
+
ESCAPE,
|
31
|
+
DECIMAL,
|
32
|
+
HEX,
|
33
|
+
OCTAL
|
34
|
+
};
|
35
|
+
|
36
|
+
explicit Lexer(BasicSource* src)
|
37
|
+
: source_(src),
|
38
|
+
buffer8_(kInitialReadBufferCapacity),
|
39
|
+
buffer16_(kInitialReadBufferCapacity),
|
40
|
+
pos_(0),
|
41
|
+
end_(source_->size()),
|
42
|
+
has_line_terminator_before_next_(false),
|
43
|
+
has_shebang_(false),
|
44
|
+
line_number_(1),
|
45
|
+
location_() {
|
46
|
+
Initialize();
|
47
|
+
}
|
48
|
+
|
49
|
+
Token::Type Next(int type) {
|
50
|
+
Token::Type token;
|
51
|
+
has_line_terminator_before_next_ = false;
|
52
|
+
do {
|
53
|
+
location_.begin_position_ = pos();
|
54
|
+
while (Chars::IsWhiteSpace(c_)) {
|
55
|
+
// white space
|
56
|
+
Advance();
|
57
|
+
}
|
58
|
+
switch (c_) {
|
59
|
+
case '"':
|
60
|
+
case '\'':
|
61
|
+
// string literal
|
62
|
+
token = ScanString();
|
63
|
+
break;
|
64
|
+
|
65
|
+
case '<':
|
66
|
+
// < <= << <<= <!--
|
67
|
+
Advance();
|
68
|
+
if (c_ == '=') {
|
69
|
+
Advance();
|
70
|
+
token = Token::LTE;
|
71
|
+
} else if (c_ == '<') {
|
72
|
+
Advance();
|
73
|
+
if (c_ == '=') {
|
74
|
+
Advance();
|
75
|
+
token = Token::ASSIGN_SHL;
|
76
|
+
} else {
|
77
|
+
token = Token::SHL;
|
78
|
+
}
|
79
|
+
} else if (c_ == '!') {
|
80
|
+
token = ScanHtmlComment();
|
81
|
+
} else {
|
82
|
+
token = Token::LT;
|
83
|
+
}
|
84
|
+
break;
|
85
|
+
|
86
|
+
case '>':
|
87
|
+
// > >= >> >>= >>> >>>=
|
88
|
+
Advance();
|
89
|
+
if (c_ == '=') {
|
90
|
+
Advance();
|
91
|
+
token = Token::GTE;
|
92
|
+
} else if (c_ == '>') {
|
93
|
+
Advance();
|
94
|
+
if (c_ == '=') {
|
95
|
+
Advance();
|
96
|
+
token = Token::ASSIGN_SAR;
|
97
|
+
} else if (c_ == '>') {
|
98
|
+
Advance();
|
99
|
+
if (c_ == '=') {
|
100
|
+
Advance();
|
101
|
+
token = Token::ASSIGN_SHR;
|
102
|
+
} else {
|
103
|
+
token = Token::SHR;
|
104
|
+
}
|
105
|
+
} else {
|
106
|
+
token = Token::SAR;
|
107
|
+
}
|
108
|
+
} else {
|
109
|
+
token = Token::GT;
|
110
|
+
}
|
111
|
+
break;
|
112
|
+
|
113
|
+
case '=':
|
114
|
+
// = == ===
|
115
|
+
Advance();
|
116
|
+
if (c_ == '=') {
|
117
|
+
Advance();
|
118
|
+
if (c_ == '=') {
|
119
|
+
Advance();
|
120
|
+
token = Token::EQ_STRICT;
|
121
|
+
} else {
|
122
|
+
token = Token::EQ;
|
123
|
+
}
|
124
|
+
} else {
|
125
|
+
token = Token::ASSIGN;
|
126
|
+
}
|
127
|
+
break;
|
128
|
+
|
129
|
+
case '!':
|
130
|
+
// ! != !==
|
131
|
+
Advance();
|
132
|
+
if (c_ == '=') {
|
133
|
+
Advance();
|
134
|
+
if (c_ == '=') {
|
135
|
+
Advance();
|
136
|
+
token = Token::NE_STRICT;
|
137
|
+
} else {
|
138
|
+
token = Token::NE;
|
139
|
+
}
|
140
|
+
} else {
|
141
|
+
token = Token::NOT;
|
142
|
+
}
|
143
|
+
break;
|
144
|
+
|
145
|
+
case '+':
|
146
|
+
// + ++ +=
|
147
|
+
Advance();
|
148
|
+
if (c_ == '+') {
|
149
|
+
Advance();
|
150
|
+
token = Token::INC;
|
151
|
+
} else if (c_ == '=') {
|
152
|
+
Advance();
|
153
|
+
token = Token::ASSIGN_ADD;
|
154
|
+
} else {
|
155
|
+
token = Token::ADD;
|
156
|
+
}
|
157
|
+
break;
|
158
|
+
|
159
|
+
case '-':
|
160
|
+
// - -- --> -=
|
161
|
+
Advance();
|
162
|
+
if (c_ == '-') {
|
163
|
+
Advance();
|
164
|
+
if (c_ == '>' && has_line_terminator_before_next_) {
|
165
|
+
token = SkipSingleLineComment();
|
166
|
+
} else {
|
167
|
+
token = Token::DEC;
|
168
|
+
}
|
169
|
+
} else if (c_ == '=') {
|
170
|
+
Advance();
|
171
|
+
token = Token::ASSIGN_SUB;
|
172
|
+
} else {
|
173
|
+
token = Token::SUB;
|
174
|
+
}
|
175
|
+
break;
|
176
|
+
|
177
|
+
case '*':
|
178
|
+
// * *=
|
179
|
+
Advance();
|
180
|
+
if (c_ == '=') {
|
181
|
+
Advance();
|
182
|
+
token = Token::ASSIGN_MUL;
|
183
|
+
} else {
|
184
|
+
token = Token::MUL;
|
185
|
+
}
|
186
|
+
break;
|
187
|
+
|
188
|
+
case '%':
|
189
|
+
// % %=
|
190
|
+
Advance();
|
191
|
+
if (c_ == '=') {
|
192
|
+
Advance();
|
193
|
+
token = Token::ASSIGN_MOD;
|
194
|
+
} else {
|
195
|
+
token = Token::MOD;
|
196
|
+
}
|
197
|
+
break;
|
198
|
+
|
199
|
+
case '/':
|
200
|
+
// / // /* /=
|
201
|
+
// ASSIGN_DIV and DIV remain to be solved which is RegExp or not.
|
202
|
+
Advance();
|
203
|
+
if (c_ == '/') {
|
204
|
+
// SINGLE LINE COMMENT
|
205
|
+
if (line_number_ == (has_shebang_ ? 1 : 2)) {
|
206
|
+
// magic comment
|
207
|
+
token = ScanMagicComment();
|
208
|
+
} else {
|
209
|
+
token = SkipSingleLineComment();
|
210
|
+
}
|
211
|
+
} else if (c_ == '*') {
|
212
|
+
// MULTI LINES COMMENT
|
213
|
+
token = SkipMultiLineComment();
|
214
|
+
} else if (c_ == '=') {
|
215
|
+
// ASSIGN_DIV
|
216
|
+
Advance();
|
217
|
+
token = Token::ASSIGN_DIV;
|
218
|
+
} else {
|
219
|
+
// DIV
|
220
|
+
token = Token::DIV;
|
221
|
+
}
|
222
|
+
break;
|
223
|
+
|
224
|
+
case '&':
|
225
|
+
// && &= &
|
226
|
+
Advance();
|
227
|
+
if (c_ == '&') {
|
228
|
+
Advance();
|
229
|
+
token = Token::LOGICAL_AND;
|
230
|
+
} else if (c_ == '=') {
|
231
|
+
Advance();
|
232
|
+
token = Token::ASSIGN_BIT_AND;
|
233
|
+
} else {
|
234
|
+
token = Token::BIT_AND;
|
235
|
+
}
|
236
|
+
break;
|
237
|
+
|
238
|
+
case '|':
|
239
|
+
// || |= |
|
240
|
+
Advance();
|
241
|
+
if (c_ == '|') {
|
242
|
+
Advance();
|
243
|
+
token = Token::LOGICAL_OR;
|
244
|
+
} else if (c_ == '=') {
|
245
|
+
Advance();
|
246
|
+
token = Token::ASSIGN_BIT_OR;
|
247
|
+
} else {
|
248
|
+
token = Token::BIT_OR;
|
249
|
+
}
|
250
|
+
break;
|
251
|
+
|
252
|
+
case '^':
|
253
|
+
// ^
|
254
|
+
Advance();
|
255
|
+
token = Token::BIT_XOR;
|
256
|
+
break;
|
257
|
+
|
258
|
+
case '.':
|
259
|
+
// . Number
|
260
|
+
Advance();
|
261
|
+
if (Chars::IsDecimalDigit(c_)) {
|
262
|
+
// float number parse
|
263
|
+
token = ScanNumber(true);
|
264
|
+
} else {
|
265
|
+
token = Token::PERIOD;
|
266
|
+
}
|
267
|
+
break;
|
268
|
+
|
269
|
+
case ':':
|
270
|
+
Advance();
|
271
|
+
token = Token::COLON;
|
272
|
+
break;
|
273
|
+
|
274
|
+
case ';':
|
275
|
+
Advance();
|
276
|
+
token = Token::SEMICOLON;
|
277
|
+
break;
|
278
|
+
|
279
|
+
case ',':
|
280
|
+
Advance();
|
281
|
+
token = Token::COMMA;
|
282
|
+
break;
|
283
|
+
|
284
|
+
case '(':
|
285
|
+
Advance();
|
286
|
+
token = Token::LPAREN;
|
287
|
+
break;
|
288
|
+
|
289
|
+
case ')':
|
290
|
+
Advance();
|
291
|
+
token = Token::RPAREN;
|
292
|
+
break;
|
293
|
+
|
294
|
+
case '[':
|
295
|
+
Advance();
|
296
|
+
token = Token::LBRACK;
|
297
|
+
break;
|
298
|
+
|
299
|
+
case ']':
|
300
|
+
Advance();
|
301
|
+
token = Token::RBRACK;
|
302
|
+
break;
|
303
|
+
|
304
|
+
case '{':
|
305
|
+
Advance();
|
306
|
+
token = Token::LBRACE;
|
307
|
+
break;
|
308
|
+
|
309
|
+
case '}':
|
310
|
+
Advance();
|
311
|
+
token = Token::RBRACE;
|
312
|
+
break;
|
313
|
+
|
314
|
+
case '?':
|
315
|
+
Advance();
|
316
|
+
token = Token::CONDITIONAL;
|
317
|
+
break;
|
318
|
+
|
319
|
+
case '~':
|
320
|
+
Advance();
|
321
|
+
token = Token::BIT_NOT;
|
322
|
+
break;
|
323
|
+
|
324
|
+
case '#':
|
325
|
+
// #!
|
326
|
+
// skip shebang as single line comment
|
327
|
+
if (pos_ == 1) {
|
328
|
+
assert(line_number_ == 1);
|
329
|
+
Advance();
|
330
|
+
if (c_ == '!') {
|
331
|
+
// shebang
|
332
|
+
has_shebang_ = true;
|
333
|
+
token = SkipSingleLineComment();
|
334
|
+
break;
|
335
|
+
}
|
336
|
+
PushBack();
|
337
|
+
}
|
338
|
+
|
339
|
+
default:
|
340
|
+
if (Chars::IsIdentifierStart(c_)) {
|
341
|
+
token = ScanIdentifier(type);
|
342
|
+
} else if (Chars::IsDecimalDigit(c_)) {
|
343
|
+
token = ScanNumber(false);
|
344
|
+
} else if (Chars::IsLineTerminator(c_)) {
|
345
|
+
SkipLineTerminator();
|
346
|
+
has_line_terminator_before_next_ = true;
|
347
|
+
token = Token::NOT_FOUND;
|
348
|
+
} else if (c_ < 0) {
|
349
|
+
// EOS
|
350
|
+
token = Token::EOS;
|
351
|
+
} else {
|
352
|
+
token = Token::ILLEGAL;
|
353
|
+
}
|
354
|
+
break;
|
355
|
+
}
|
356
|
+
} while (token == Token::NOT_FOUND);
|
357
|
+
location_.end_position_ = pos();
|
358
|
+
return token;
|
359
|
+
}
|
360
|
+
|
361
|
+
inline const std::vector<uc16>& Buffer() const {
|
362
|
+
return buffer16_;
|
363
|
+
}
|
364
|
+
|
365
|
+
inline const std::vector<char>& Buffer8() const {
|
366
|
+
return buffer8_;
|
367
|
+
}
|
368
|
+
|
369
|
+
inline const double& Numeric() const {
|
370
|
+
return numeric_;
|
371
|
+
}
|
372
|
+
|
373
|
+
inline State NumericType() const {
|
374
|
+
assert(type_ == DECIMAL ||
|
375
|
+
type_ == HEX ||
|
376
|
+
type_ == OCTAL);
|
377
|
+
return type_;
|
378
|
+
}
|
379
|
+
|
380
|
+
inline State StringEscapeType() const {
|
381
|
+
assert(type_ == NONE ||
|
382
|
+
type_ == ESCAPE ||
|
383
|
+
type_ == OCTAL);
|
384
|
+
return type_;
|
385
|
+
}
|
386
|
+
|
387
|
+
inline bool has_line_terminator_before_next() const {
|
388
|
+
return has_line_terminator_before_next_;
|
389
|
+
}
|
390
|
+
|
391
|
+
std::size_t line_number() const {
|
392
|
+
return line_number_;
|
393
|
+
}
|
394
|
+
|
395
|
+
const std::string& filename() const {
|
396
|
+
return source_->filename();
|
397
|
+
}
|
398
|
+
|
399
|
+
std::size_t pos() const {
|
400
|
+
return pos_;
|
401
|
+
}
|
402
|
+
|
403
|
+
inline BasicSource* source() const {
|
404
|
+
return source_;
|
405
|
+
}
|
406
|
+
|
407
|
+
inline Location location() const {
|
408
|
+
return location_;
|
409
|
+
}
|
410
|
+
|
411
|
+
bool ScanRegExpLiteral(bool contains_eq) {
|
412
|
+
bool character = false;
|
413
|
+
buffer16_.clear();
|
414
|
+
if (contains_eq) {
|
415
|
+
Record16('=');
|
416
|
+
}
|
417
|
+
while (c_ != '/' || character) {
|
418
|
+
// invalid RegExp pattern
|
419
|
+
if (Chars::IsLineTerminator(c_) || c_ < 0) {
|
420
|
+
return false;
|
421
|
+
}
|
422
|
+
if (c_ == '\\') {
|
423
|
+
// escape
|
424
|
+
Record16Advance();
|
425
|
+
if (Chars::IsLineTerminator(c_) || c_ < 0) {
|
426
|
+
return false;
|
427
|
+
}
|
428
|
+
Record16Advance();
|
429
|
+
} else {
|
430
|
+
if (c_ == '[') {
|
431
|
+
character = true;
|
432
|
+
} else if (c_ == ']') {
|
433
|
+
character = false;
|
434
|
+
}
|
435
|
+
Record16Advance();
|
436
|
+
}
|
437
|
+
}
|
438
|
+
Advance();
|
439
|
+
return true;
|
440
|
+
}
|
441
|
+
|
442
|
+
bool ScanRegExpFlags() {
|
443
|
+
buffer16_.clear();
|
444
|
+
uc16 uc;
|
445
|
+
while (Chars::IsIdentifierPart(c_)) {
|
446
|
+
if (c_ == '\\') {
|
447
|
+
Advance();
|
448
|
+
if (c_ != 'u') {
|
449
|
+
return false;
|
450
|
+
}
|
451
|
+
Advance();
|
452
|
+
uc = ScanHexEscape('u', 4);
|
453
|
+
if (uc == '\\') {
|
454
|
+
return false;
|
455
|
+
}
|
456
|
+
Record16(uc);
|
457
|
+
} else {
|
458
|
+
Record16Advance();
|
459
|
+
}
|
460
|
+
}
|
461
|
+
return true;
|
462
|
+
}
|
463
|
+
|
464
|
+
private:
|
465
|
+
static const std::size_t kInitialReadBufferCapacity = 32;
|
466
|
+
|
467
|
+
void Initialize() {
|
468
|
+
Advance();
|
469
|
+
}
|
470
|
+
|
471
|
+
inline void Advance() {
|
472
|
+
if (pos_ == end_) {
|
473
|
+
c_ = -1;
|
474
|
+
} else {
|
475
|
+
c_ = source_->Get(pos_++);
|
476
|
+
}
|
477
|
+
}
|
478
|
+
inline void Record8() {
|
479
|
+
buffer8_.push_back(static_cast<char>(c_));
|
480
|
+
}
|
481
|
+
inline void Record8(const int ch) {
|
482
|
+
buffer8_.push_back(static_cast<char>(ch));
|
483
|
+
}
|
484
|
+
inline void Record16() { buffer16_.push_back(c_); }
|
485
|
+
inline void Record16(const int ch) { buffer16_.push_back(ch); }
|
486
|
+
inline void Record8Advance() {
|
487
|
+
Record8();
|
488
|
+
Advance();
|
489
|
+
}
|
490
|
+
inline void Record16Advance() {
|
491
|
+
Record16();
|
492
|
+
Advance();
|
493
|
+
}
|
494
|
+
|
495
|
+
void PushBack() {
|
496
|
+
if (pos_ < 2) {
|
497
|
+
c_ = -1;
|
498
|
+
} else {
|
499
|
+
c_ = source_->Get(pos_-2);
|
500
|
+
--pos_;
|
501
|
+
}
|
502
|
+
}
|
503
|
+
|
504
|
+
inline Token::Type IsMatch(char const * keyword,
|
505
|
+
std::size_t len,
|
506
|
+
Token::Type guess, bool strict) const {
|
507
|
+
if (!strict) {
|
508
|
+
return Token::IDENTIFIER;
|
509
|
+
}
|
510
|
+
std::vector<uc16>::const_iterator it = buffer16_.begin();
|
511
|
+
do {
|
512
|
+
if (*it++ != *keyword++) {
|
513
|
+
return Token::IDENTIFIER;
|
514
|
+
}
|
515
|
+
} while (--len);
|
516
|
+
return guess;
|
517
|
+
}
|
518
|
+
|
519
|
+
inline Token::Type IsMatch(char const * keyword,
|
520
|
+
std::size_t len,
|
521
|
+
Token::Type guess) const {
|
522
|
+
std::vector<uc16>::const_iterator it = buffer16_.begin();
|
523
|
+
do {
|
524
|
+
if (*it++ != *keyword++) {
|
525
|
+
return Token::IDENTIFIER;
|
526
|
+
}
|
527
|
+
} while (--len);
|
528
|
+
return guess;
|
529
|
+
}
|
530
|
+
|
531
|
+
Token::Type SkipSingleLineComment() {
|
532
|
+
Advance();
|
533
|
+
// see ECMA-262 section 7.4
|
534
|
+
while (c_ >= 0 && !Chars::IsLineTerminator(c_)) {
|
535
|
+
Advance();
|
536
|
+
}
|
537
|
+
return Token::NOT_FOUND;
|
538
|
+
}
|
539
|
+
|
540
|
+
Token::Type SkipMultiLineComment() {
|
541
|
+
Advance();
|
542
|
+
// remember previous ch
|
543
|
+
uc16 ch;
|
544
|
+
while (c_ >= 0) {
|
545
|
+
ch = c_;
|
546
|
+
Advance();
|
547
|
+
if (ch == '*' && c_ == '/') {
|
548
|
+
c_ = ' ';
|
549
|
+
return Token::NOT_FOUND;
|
550
|
+
} else if (Chars::IsLineTerminator(c_)) {
|
551
|
+
// see ECMA-262 section 7.4
|
552
|
+
SkipLineTerminator();
|
553
|
+
has_line_terminator_before_next_ = true;
|
554
|
+
ch = '\n';
|
555
|
+
}
|
556
|
+
}
|
557
|
+
return Token::ILLEGAL;
|
558
|
+
}
|
559
|
+
|
560
|
+
Token::Type ScanHtmlComment() {
|
561
|
+
Advance();
|
562
|
+
if (c_ == '-') {
|
563
|
+
// <!-
|
564
|
+
Advance();
|
565
|
+
if (c_ == '-') {
|
566
|
+
// <!--
|
567
|
+
return SkipSingleLineComment();
|
568
|
+
}
|
569
|
+
PushBack();
|
570
|
+
}
|
571
|
+
// <! is LT and NOT
|
572
|
+
PushBack();
|
573
|
+
return Token::LT;
|
574
|
+
}
|
575
|
+
|
576
|
+
Token::Type ScanMagicComment() {
|
577
|
+
Advance();
|
578
|
+
// see ECMA-262 section 7.4
|
579
|
+
while (c_ >= 0 && !Chars::IsLineTerminator(c_)) {
|
580
|
+
Advance();
|
581
|
+
}
|
582
|
+
return Token::NOT_FOUND;
|
583
|
+
}
|
584
|
+
|
585
|
+
Token::Type ScanIdentifier(int type) {
|
586
|
+
Token::Type token = Token::IDENTIFIER;
|
587
|
+
uc16 uc;
|
588
|
+
|
589
|
+
buffer16_.clear();
|
590
|
+
|
591
|
+
if (c_ == '\\') {
|
592
|
+
Advance();
|
593
|
+
if (c_ != 'u') {
|
594
|
+
return Token::ILLEGAL;
|
595
|
+
}
|
596
|
+
Advance();
|
597
|
+
uc = ScanHexEscape('u', 4);
|
598
|
+
if (uc == '\\' || !Chars::IsIdentifierStart(uc)) {
|
599
|
+
return Token::ILLEGAL;
|
600
|
+
}
|
601
|
+
Record16(uc);
|
602
|
+
} else {
|
603
|
+
Record16Advance();
|
604
|
+
}
|
605
|
+
|
606
|
+
while (Chars::IsIdentifierPart(c_)) {
|
607
|
+
if (c_ == '\\') {
|
608
|
+
Advance();
|
609
|
+
if (c_ != 'u') {
|
610
|
+
return Token::ILLEGAL;
|
611
|
+
}
|
612
|
+
Advance();
|
613
|
+
uc = ScanHexEscape('u', 4);
|
614
|
+
if (uc == '\\' || !Chars::IsIdentifierPart(uc)) {
|
615
|
+
return Token::ILLEGAL;
|
616
|
+
}
|
617
|
+
Record16(uc);
|
618
|
+
} else {
|
619
|
+
Record16Advance();
|
620
|
+
}
|
621
|
+
}
|
622
|
+
|
623
|
+
if (type & kIdentifyReservedWords) {
|
624
|
+
token = DetectKeyword(type & kStrict);
|
625
|
+
} else if (type & kIgnoreReservedWordsAndIdentifyGetterOrSetter) {
|
626
|
+
token = DetectGetOrSet();
|
627
|
+
}
|
628
|
+
|
629
|
+
return token;
|
630
|
+
}
|
631
|
+
|
632
|
+
// detect which Identifier is Keyword, FutureReservedWord or not
|
633
|
+
// Keyword and FutureReservedWord are defined in ECMA-262 5th.
|
634
|
+
//
|
635
|
+
// Some words such as :
|
636
|
+
// int, short, boolean, byte, long, char, float, double, abstract, volatile,
|
637
|
+
// transient, final, throws, goto, native, synchronized
|
638
|
+
// were defined as FutureReservedWord in ECMA-262 3rd, but not in 5th.
|
639
|
+
// So, DetectKeyword interprets them as Identifier.
|
640
|
+
Token::Type DetectKeyword(bool strict) const {
|
641
|
+
const std::size_t len = buffer16_.size();
|
642
|
+
Token::Type token = Token::IDENTIFIER;
|
643
|
+
switch (len) {
|
644
|
+
case 2:
|
645
|
+
// if in do
|
646
|
+
if (buffer16_[0] == 'i') {
|
647
|
+
if (buffer16_[1] == 'f') {
|
648
|
+
token = Token::IF;
|
649
|
+
} else if (buffer16_[1] == 'n') {
|
650
|
+
token = Token::IN;
|
651
|
+
}
|
652
|
+
} else if (buffer16_[0] == 'd' && buffer16_[1] == 'o') {
|
653
|
+
// do
|
654
|
+
token = Token::DO;
|
655
|
+
}
|
656
|
+
break;
|
657
|
+
case 3:
|
658
|
+
// for var int new try let
|
659
|
+
switch (buffer16_[2]) {
|
660
|
+
case 't':
|
661
|
+
if (buffer16_[0] == 'l' && buffer16_[1] == 'e' && strict) {
|
662
|
+
// let
|
663
|
+
token = Token::LET;
|
664
|
+
} else if (buffer16_[0] == 'i' && buffer16_[1] == 'n') {
|
665
|
+
// int (removed)
|
666
|
+
// token = Token::INT;
|
667
|
+
}
|
668
|
+
break;
|
669
|
+
case 'r':
|
670
|
+
// for var
|
671
|
+
if (buffer16_[0] == 'f' && buffer16_[1] == 'o') {
|
672
|
+
// for
|
673
|
+
token = Token::FOR;
|
674
|
+
} else if (buffer16_[0] == 'v' && buffer16_[1] == 'a') {
|
675
|
+
// var
|
676
|
+
token = Token::VAR;
|
677
|
+
}
|
678
|
+
break;
|
679
|
+
case 'y':
|
680
|
+
// try
|
681
|
+
if (buffer16_[0] == 't' && buffer16_[1] == 'r') {
|
682
|
+
token = Token::TRY;
|
683
|
+
}
|
684
|
+
break;
|
685
|
+
case 'w':
|
686
|
+
// new
|
687
|
+
if (buffer16_[0] == 'n' && buffer16_[1] == 'e') {
|
688
|
+
token = Token::NEW;
|
689
|
+
}
|
690
|
+
break;
|
691
|
+
}
|
692
|
+
break;
|
693
|
+
case 4:
|
694
|
+
// else case true byte null this
|
695
|
+
// void with long enum char goto
|
696
|
+
// number 3 character is most duplicated
|
697
|
+
switch (buffer16_[3]) {
|
698
|
+
case 'e':
|
699
|
+
// else case true byte
|
700
|
+
if (buffer16_[2] == 's') {
|
701
|
+
if (buffer16_[0] == 'e' && buffer16_[1] == 'l') {
|
702
|
+
// else
|
703
|
+
token = Token::ELSE;
|
704
|
+
} else if (buffer16_[0] == 'c' && buffer16_[1] == 'a') {
|
705
|
+
// case
|
706
|
+
token = Token::CASE;
|
707
|
+
}
|
708
|
+
} else if (buffer16_[0] == 't' &&
|
709
|
+
buffer16_[1] == 'r' && buffer16_[2] == 'u') {
|
710
|
+
// true
|
711
|
+
token = Token::TRUE_LITERAL;
|
712
|
+
} else if (buffer16_[0] == 'b' &&
|
713
|
+
buffer16_[1] == 'y' && buffer16_[2] == 't') {
|
714
|
+
// byte (removed)
|
715
|
+
// token = Token::BYTE;
|
716
|
+
}
|
717
|
+
break;
|
718
|
+
case 'l':
|
719
|
+
// null
|
720
|
+
if (buffer16_[0] == 'n' &&
|
721
|
+
buffer16_[1] == 'u' && buffer16_[2] == 'l') {
|
722
|
+
token = Token::NULL_LITERAL;
|
723
|
+
}
|
724
|
+
break;
|
725
|
+
case 's':
|
726
|
+
// this
|
727
|
+
if (buffer16_[0] == 't' &&
|
728
|
+
buffer16_[1] == 'h' && buffer16_[2] == 'i') {
|
729
|
+
token = Token::THIS;
|
730
|
+
}
|
731
|
+
break;
|
732
|
+
case 'd':
|
733
|
+
// void
|
734
|
+
if (buffer16_[0] == 'v' &&
|
735
|
+
buffer16_[1] == 'o' && buffer16_[2] == 'i') {
|
736
|
+
token = Token::VOID;
|
737
|
+
}
|
738
|
+
break;
|
739
|
+
case 'h':
|
740
|
+
// with
|
741
|
+
if (buffer16_[0] == 'w' &&
|
742
|
+
buffer16_[1] == 'i' && buffer16_[2] == 't') {
|
743
|
+
token = Token::WITH;
|
744
|
+
}
|
745
|
+
break;
|
746
|
+
case 'g':
|
747
|
+
// long (removed)
|
748
|
+
if (buffer16_[0] == 'l' &&
|
749
|
+
buffer16_[1] == 'o' && buffer16_[2] == 'n') {
|
750
|
+
// token = Token::LONG;
|
751
|
+
}
|
752
|
+
break;
|
753
|
+
case 'm':
|
754
|
+
// enum
|
755
|
+
if (buffer16_[0] == 'e' &&
|
756
|
+
buffer16_[1] == 'n' && buffer16_[2] == 'u') {
|
757
|
+
token = Token::ENUM;
|
758
|
+
}
|
759
|
+
break;
|
760
|
+
case 'r':
|
761
|
+
// char (removed)
|
762
|
+
if (buffer16_[0] == 'c' &&
|
763
|
+
buffer16_[1] == 'h' && buffer16_[2] == 'a') {
|
764
|
+
// token = Token::CHAR;
|
765
|
+
}
|
766
|
+
break;
|
767
|
+
case 'o':
|
768
|
+
// goto (removed)
|
769
|
+
if (buffer16_[0] == 'g' &&
|
770
|
+
buffer16_[1] == 'o' && buffer16_[2] == 't') {
|
771
|
+
// token = Token::GOTO;
|
772
|
+
}
|
773
|
+
break;
|
774
|
+
}
|
775
|
+
break;
|
776
|
+
case 5:
|
777
|
+
// break final float catch super while
|
778
|
+
// throw short class const false yield
|
779
|
+
// number 3 character is most duplicated
|
780
|
+
switch (buffer16_[3]) {
|
781
|
+
case 'a':
|
782
|
+
// break final float
|
783
|
+
if (buffer16_[0] == 'b' && buffer16_[1] == 'r' &&
|
784
|
+
buffer16_[2] == 'e' && buffer16_[4] == 'k') {
|
785
|
+
// break
|
786
|
+
token = Token::BREAK;
|
787
|
+
} else if (buffer16_[0] == 'f') {
|
788
|
+
if (buffer16_[1] == 'i' &&
|
789
|
+
buffer16_[2] == 'n' && buffer16_[4] == 'l') {
|
790
|
+
// final (removed)
|
791
|
+
// token = Token::FINAL;
|
792
|
+
} else if (buffer16_[1] == 'l' &&
|
793
|
+
buffer16_[2] == 'o' && buffer16_[4] == 't') {
|
794
|
+
// float (removed)
|
795
|
+
// token = Token::FLOAT;
|
796
|
+
}
|
797
|
+
}
|
798
|
+
break;
|
799
|
+
case 'c':
|
800
|
+
if (buffer16_[0] == 'c' && buffer16_[1] == 'a' &&
|
801
|
+
buffer16_[2] == 't' && buffer16_[4] == 'h') {
|
802
|
+
// catch
|
803
|
+
token = Token::CATCH;
|
804
|
+
}
|
805
|
+
break;
|
806
|
+
case 'e':
|
807
|
+
if (buffer16_[0] == 's' && buffer16_[1] == 'u' &&
|
808
|
+
buffer16_[2] == 'p' && buffer16_[4] == 'r') {
|
809
|
+
// super
|
810
|
+
token = Token::SUPER;
|
811
|
+
}
|
812
|
+
break;
|
813
|
+
case 'l':
|
814
|
+
if (buffer16_[0] == 'w' && buffer16_[1] == 'h' &&
|
815
|
+
buffer16_[2] == 'i' && buffer16_[4] == 'e') {
|
816
|
+
// while
|
817
|
+
token = Token::WHILE;
|
818
|
+
} else if (strict &&
|
819
|
+
buffer16_[0] == 'y' && buffer16_[1] == 'i' &&
|
820
|
+
buffer16_[2] == 'e' && buffer16_[4] == 'd') {
|
821
|
+
// yield
|
822
|
+
token = Token::YIELD;
|
823
|
+
}
|
824
|
+
break;
|
825
|
+
case 'o':
|
826
|
+
if (buffer16_[0] == 't' && buffer16_[1] == 'h' &&
|
827
|
+
buffer16_[2] == 'r' && buffer16_[4] == 'w') {
|
828
|
+
// throw
|
829
|
+
token = Token::THROW;
|
830
|
+
}
|
831
|
+
break;
|
832
|
+
case 'r':
|
833
|
+
if (buffer16_[0] == 's' && buffer16_[1] == 'h' &&
|
834
|
+
buffer16_[2] == 'o' && buffer16_[4] == 't') {
|
835
|
+
// short (removed)
|
836
|
+
// token = Token::SHORT;
|
837
|
+
}
|
838
|
+
break;
|
839
|
+
case 's':
|
840
|
+
// class const false
|
841
|
+
if (buffer16_[0] == 'c') {
|
842
|
+
if (buffer16_[1] == 'l' &&
|
843
|
+
buffer16_[2] == 'a' && buffer16_[4] == 's') {
|
844
|
+
// class
|
845
|
+
token = Token::CLASS;
|
846
|
+
} else if (buffer16_[1] == 'o' &&
|
847
|
+
buffer16_[2] == 'n' && buffer16_[4] == 't') {
|
848
|
+
// const
|
849
|
+
token = Token::CONST;
|
850
|
+
}
|
851
|
+
} else if (buffer16_[0] == 'f' && buffer16_[1] == 'a' &&
|
852
|
+
buffer16_[2] == 'l' && buffer16_[4] == 'e') {
|
853
|
+
// false
|
854
|
+
token = Token::FALSE_LITERAL;
|
855
|
+
}
|
856
|
+
break;
|
857
|
+
}
|
858
|
+
break;
|
859
|
+
case 6:
|
860
|
+
// double delete export import native
|
861
|
+
// public return static switch typeof throws
|
862
|
+
// number 0 character is most duplicated
|
863
|
+
switch (buffer16_[0]) {
|
864
|
+
case 'd':
|
865
|
+
// double delete
|
866
|
+
if (buffer16_[5] == 'e' &&
|
867
|
+
buffer16_[4] == 'l' && buffer16_[3] == 'b' &&
|
868
|
+
buffer16_[2] == 'u' && buffer16_[1] == 'o') {
|
869
|
+
// double
|
870
|
+
// token = Token::DOUBLE;
|
871
|
+
} else if (buffer16_[5] == 'e' &&
|
872
|
+
buffer16_[4] == 't' && buffer16_[3] == 'e' &&
|
873
|
+
buffer16_[2] == 'l' && buffer16_[1] == 'e') {
|
874
|
+
// delete
|
875
|
+
token = Token::DELETE;
|
876
|
+
}
|
877
|
+
break;
|
878
|
+
case 'e':
|
879
|
+
// export
|
880
|
+
token = IsMatch("export", len, Token::EXPORT);
|
881
|
+
break;
|
882
|
+
case 'i':
|
883
|
+
// import
|
884
|
+
token = IsMatch("import", len, Token::IMPORT);
|
885
|
+
break;
|
886
|
+
case 'n':
|
887
|
+
// native (removed)
|
888
|
+
// token = IsMatch("native", len, Token::NATIVE);
|
889
|
+
break;
|
890
|
+
case 'p':
|
891
|
+
// public
|
892
|
+
token = IsMatch("public", len, Token::PUBLIC, strict);
|
893
|
+
break;
|
894
|
+
case 'r':
|
895
|
+
// return
|
896
|
+
token = IsMatch("return", len, Token::RETURN);
|
897
|
+
break;
|
898
|
+
case 's':
|
899
|
+
// switch static
|
900
|
+
if (buffer16_[1] == 'w' &&
|
901
|
+
buffer16_[2] == 'i' && buffer16_[3] == 't' &&
|
902
|
+
buffer16_[4] == 'c' && buffer16_[5] == 'h') {
|
903
|
+
// switch
|
904
|
+
token = Token::SWITCH;
|
905
|
+
} else if (strict &&
|
906
|
+
buffer16_[1] == 't' &&
|
907
|
+
buffer16_[2] == 'a' && buffer16_[3] == 't' &&
|
908
|
+
buffer16_[4] == 'i' && buffer16_[5] == 'c') {
|
909
|
+
// static
|
910
|
+
token = Token::STATIC;
|
911
|
+
}
|
912
|
+
break;
|
913
|
+
case 't':
|
914
|
+
// typeof throws
|
915
|
+
if (buffer16_[5] == 'f' &&
|
916
|
+
buffer16_[4] == 'o' && buffer16_[3] == 'e' &&
|
917
|
+
buffer16_[2] == 'p' && buffer16_[1] == 'y') {
|
918
|
+
// typeof
|
919
|
+
token = Token::TYPEOF;
|
920
|
+
} else if (buffer16_[5] == 's' &&
|
921
|
+
buffer16_[4] == 'w' && buffer16_[3] == 'o' &&
|
922
|
+
buffer16_[2] == 'r' && buffer16_[1] == 'h') {
|
923
|
+
// throws (removed)
|
924
|
+
// token = Token::THROWS;
|
925
|
+
}
|
926
|
+
break;
|
927
|
+
}
|
928
|
+
break;
|
929
|
+
case 7:
|
930
|
+
// boolean default extends finally package private
|
931
|
+
// number 0 character is most duplicated
|
932
|
+
switch (buffer16_[0]) {
|
933
|
+
case 'b':
|
934
|
+
// boolean (removed)
|
935
|
+
// token = IsMatch("boolean", len, Token::BOOLEAN);
|
936
|
+
break;
|
937
|
+
case 'd':
|
938
|
+
token = IsMatch("default", len, Token::DEFAULT);
|
939
|
+
break;
|
940
|
+
case 'e':
|
941
|
+
token = IsMatch("extends", len, Token::EXTENDS);
|
942
|
+
break;
|
943
|
+
case 'f':
|
944
|
+
token = IsMatch("finally", len, Token::FINALLY);
|
945
|
+
break;
|
946
|
+
case 'p':
|
947
|
+
if (buffer16_[1] == 'a') {
|
948
|
+
token = IsMatch("package", len, Token::PACKAGE, strict);
|
949
|
+
} else if (buffer16_[1] == 'r') {
|
950
|
+
token = IsMatch("private", len, Token::PRIVATE, strict);
|
951
|
+
}
|
952
|
+
break;
|
953
|
+
}
|
954
|
+
break;
|
955
|
+
case 8:
|
956
|
+
// debugger continue abstract volatile function
|
957
|
+
// number 4 character is most duplicated
|
958
|
+
switch (buffer16_[4]) {
|
959
|
+
case 'g':
|
960
|
+
token = IsMatch("debugger", len, Token::DEBUGGER);
|
961
|
+
break;
|
962
|
+
case 'i':
|
963
|
+
token = IsMatch("continue", len, Token::CONTINUE);
|
964
|
+
break;
|
965
|
+
case 'r':
|
966
|
+
// abstract (removed)
|
967
|
+
// token = IsMatch("abstract", len, Token::ABSTRACT);
|
968
|
+
break;
|
969
|
+
case 't':
|
970
|
+
if (buffer16_[1] == 'o') {
|
971
|
+
// token = IsMatch("volatile", len, Token::VOLATILE);
|
972
|
+
} else if (buffer16_[1] == 'u') {
|
973
|
+
token = IsMatch("function", len, Token::FUNCTION);
|
974
|
+
}
|
975
|
+
break;
|
976
|
+
}
|
977
|
+
break;
|
978
|
+
case 9:
|
979
|
+
// interface protected transient
|
980
|
+
if (buffer16_[1] == 'n') {
|
981
|
+
token = IsMatch("interface", len, Token::INTERFACE, strict);
|
982
|
+
} else if (buffer16_[1] == 'r') {
|
983
|
+
if (buffer16_[0] == 'p') {
|
984
|
+
token = IsMatch("protected", len, Token::PROTECTED, strict);
|
985
|
+
} else if (buffer16_[0] == 't') {
|
986
|
+
// transient (removed)
|
987
|
+
// token = IsMatch("transient", len, Token::TRANSIENT);
|
988
|
+
}
|
989
|
+
}
|
990
|
+
break;
|
991
|
+
case 10:
|
992
|
+
// instanceof implements
|
993
|
+
if (buffer16_[1] == 'n') {
|
994
|
+
token = IsMatch("instanceof", len, Token::INSTANCEOF);
|
995
|
+
} else if (buffer16_[1] == 'm') {
|
996
|
+
token = IsMatch("implements", len, Token::IMPLEMENTS, strict);
|
997
|
+
}
|
998
|
+
break;
|
999
|
+
case 12:
|
1000
|
+
// synchronized (removed)
|
1001
|
+
// token = IsMatch("synchronized", len, Token::SYNCHRONIZED);
|
1002
|
+
token = Token::IDENTIFIER;
|
1003
|
+
break;
|
1004
|
+
}
|
1005
|
+
return token;
|
1006
|
+
}
|
1007
|
+
|
1008
|
+
Token::Type DetectGetOrSet() const {
|
1009
|
+
if (buffer16_.size() == 3) {
|
1010
|
+
if (buffer16_[1] == 'e' && buffer16_[2] == 't') {
|
1011
|
+
if (buffer16_[0] == 'g') {
|
1012
|
+
return Token::GET;
|
1013
|
+
} else if (buffer16_[0] == 's') {
|
1014
|
+
return Token::SET;
|
1015
|
+
}
|
1016
|
+
}
|
1017
|
+
}
|
1018
|
+
return Token::IDENTIFIER;
|
1019
|
+
}
|
1020
|
+
|
1021
|
+
Token::Type ScanString() {
|
1022
|
+
type_ = NONE;
|
1023
|
+
const uc16 quote = c_;
|
1024
|
+
buffer16_.clear();
|
1025
|
+
Advance();
|
1026
|
+
while (c_ != quote && c_ >= 0 && !Chars::IsLineTerminator(c_)) {
|
1027
|
+
if (c_ == '\\') {
|
1028
|
+
Advance();
|
1029
|
+
// escape sequence
|
1030
|
+
if (c_ < 0) return Token::ILLEGAL;
|
1031
|
+
if (type_ == NONE) {
|
1032
|
+
type_ = ESCAPE;
|
1033
|
+
}
|
1034
|
+
ScanEscape();
|
1035
|
+
} else {
|
1036
|
+
Record16Advance();
|
1037
|
+
}
|
1038
|
+
}
|
1039
|
+
if (c_ != quote) {
|
1040
|
+
// not closed
|
1041
|
+
return Token::ILLEGAL;
|
1042
|
+
}
|
1043
|
+
Advance();
|
1044
|
+
|
1045
|
+
return Token::STRING;
|
1046
|
+
}
|
1047
|
+
|
1048
|
+
void ScanEscape() {
|
1049
|
+
if (Chars::IsLineTerminator(c_)) {
|
1050
|
+
SkipLineTerminator();
|
1051
|
+
return;
|
1052
|
+
}
|
1053
|
+
switch (c_) {
|
1054
|
+
case '\'':
|
1055
|
+
case '"' :
|
1056
|
+
case '\\':
|
1057
|
+
Record16Advance();
|
1058
|
+
break;
|
1059
|
+
case 'b' :
|
1060
|
+
Record16('\b');
|
1061
|
+
Advance();
|
1062
|
+
break;
|
1063
|
+
case 'f' :
|
1064
|
+
Record16('\f');
|
1065
|
+
Advance();
|
1066
|
+
break;
|
1067
|
+
case 'n' :
|
1068
|
+
Record16('\n');
|
1069
|
+
Advance();
|
1070
|
+
break;
|
1071
|
+
case 'r' :
|
1072
|
+
Record16('\r');
|
1073
|
+
Advance();
|
1074
|
+
break;
|
1075
|
+
case 't' :
|
1076
|
+
Record16('\t');
|
1077
|
+
Advance();
|
1078
|
+
break;
|
1079
|
+
case 'u' :
|
1080
|
+
Advance();
|
1081
|
+
Record16(ScanHexEscape('u', 4));
|
1082
|
+
break;
|
1083
|
+
case 'v' :
|
1084
|
+
Record16('\v');
|
1085
|
+
Advance();
|
1086
|
+
break;
|
1087
|
+
case 'x' :
|
1088
|
+
Advance();
|
1089
|
+
Record16(ScanHexEscape('x', 2));
|
1090
|
+
break;
|
1091
|
+
case '0' :
|
1092
|
+
case '1' :
|
1093
|
+
case '2' :
|
1094
|
+
case '3' :
|
1095
|
+
case '4' :
|
1096
|
+
case '5' :
|
1097
|
+
case '6' :
|
1098
|
+
case '7' :
|
1099
|
+
if (type_ != OCTAL) {
|
1100
|
+
type_ = OCTAL;
|
1101
|
+
}
|
1102
|
+
Record16(ScanOctalEscape());
|
1103
|
+
break;
|
1104
|
+
|
1105
|
+
default:
|
1106
|
+
Record16Advance();
|
1107
|
+
break;
|
1108
|
+
}
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
Token::Type ScanNumber(const bool period) {
|
1112
|
+
buffer8_.clear();
|
1113
|
+
State type = DECIMAL;
|
1114
|
+
if (period) {
|
1115
|
+
Record8('0');
|
1116
|
+
Record8('.');
|
1117
|
+
ScanDecimalDigits();
|
1118
|
+
} else {
|
1119
|
+
if (c_ == '0') {
|
1120
|
+
// 0x (hex) or 0 (octal)
|
1121
|
+
Record8Advance();
|
1122
|
+
if (c_ == 'x' || c_ == 'X') {
|
1123
|
+
// 0x (hex)
|
1124
|
+
type = HEX;
|
1125
|
+
Record8Advance();
|
1126
|
+
if (!Chars::IsHexDigit(c_)) {
|
1127
|
+
return Token::ILLEGAL;
|
1128
|
+
}
|
1129
|
+
while (Chars::IsHexDigit(c_)) {
|
1130
|
+
Record8Advance();
|
1131
|
+
}
|
1132
|
+
} else if (Chars::IsOctalDigit(c_)) {
|
1133
|
+
// 0 (octal)
|
1134
|
+
// octal number cannot convert with strtod
|
1135
|
+
type = OCTAL;
|
1136
|
+
Record8Advance();
|
1137
|
+
while (true) {
|
1138
|
+
if (c_ == '8' || c_ == '9') {
|
1139
|
+
// not octal digits
|
1140
|
+
type = DECIMAL;
|
1141
|
+
break;
|
1142
|
+
}
|
1143
|
+
if (c_ < '0' || '7' < c_) {
|
1144
|
+
break;
|
1145
|
+
}
|
1146
|
+
Record8Advance();
|
1147
|
+
}
|
1148
|
+
}
|
1149
|
+
}
|
1150
|
+
if (type == DECIMAL) {
|
1151
|
+
ScanDecimalDigits();
|
1152
|
+
if (c_ == '.') {
|
1153
|
+
Record8Advance();
|
1154
|
+
ScanDecimalDigits();
|
1155
|
+
}
|
1156
|
+
}
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
// exponent part
|
1160
|
+
if (c_ == 'e' || c_ == 'E') {
|
1161
|
+
if (type != DECIMAL) {
|
1162
|
+
return Token::ILLEGAL;
|
1163
|
+
}
|
1164
|
+
Record8Advance();
|
1165
|
+
if (c_ == '+' || c_ == '-') {
|
1166
|
+
Record8Advance();
|
1167
|
+
}
|
1168
|
+
// more than 1 decimal digit required
|
1169
|
+
if (!Chars::IsDecimalDigit(c_)) {
|
1170
|
+
return Token::ILLEGAL;
|
1171
|
+
}
|
1172
|
+
ScanDecimalDigits();
|
1173
|
+
}
|
1174
|
+
|
1175
|
+
// see ECMA-262 section 7.8.3
|
1176
|
+
// "immediately following a NumericLiteral must not be an IdentifierStart or
|
1177
|
+
// DecimalDigit."
|
1178
|
+
if (Chars::IsDecimalDigit(c_) || Chars::IsIdentifierStart(c_)) {
|
1179
|
+
return Token::ILLEGAL;
|
1180
|
+
}
|
1181
|
+
|
1182
|
+
if (type == OCTAL) {
|
1183
|
+
double val = 0;
|
1184
|
+
for (std::vector<char>::const_iterator it = buffer8_.begin(),
|
1185
|
+
last = buffer8_.end(); it != last; ++it) {
|
1186
|
+
val = val * 8 + (*it - '0');
|
1187
|
+
}
|
1188
|
+
numeric_ = val;
|
1189
|
+
} else {
|
1190
|
+
Record8('\0'); // Null Terminated String
|
1191
|
+
numeric_ = std::strtod(buffer8_.data(), NULL);
|
1192
|
+
}
|
1193
|
+
type_ = type;
|
1194
|
+
return Token::NUMBER;
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
uc16 ScanOctalEscape() {
|
1198
|
+
uc16 res = 0;
|
1199
|
+
for (int i = 0; i < 3; ++i) {
|
1200
|
+
const int d = OctalValue(c_);
|
1201
|
+
if (d < 0) {
|
1202
|
+
break;
|
1203
|
+
}
|
1204
|
+
const int t = res * 8 + d;
|
1205
|
+
if (t > 255) {
|
1206
|
+
break;
|
1207
|
+
}
|
1208
|
+
res = t;
|
1209
|
+
Advance();
|
1210
|
+
}
|
1211
|
+
return res;
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
uc16 ScanHexEscape(uc16 c, int len) {
|
1215
|
+
uc16 res = 0;
|
1216
|
+
for (int i = 0; i < len; ++i) {
|
1217
|
+
const int d = HexValue(c_);
|
1218
|
+
if (d < 0) {
|
1219
|
+
for (int j = i - 1; j >= 0; --j) {
|
1220
|
+
PushBack();
|
1221
|
+
}
|
1222
|
+
return c;
|
1223
|
+
}
|
1224
|
+
res = res * 16 + d;
|
1225
|
+
Advance();
|
1226
|
+
}
|
1227
|
+
return res;
|
1228
|
+
}
|
1229
|
+
|
1230
|
+
inline int OctalValue(const int c) const {
|
1231
|
+
if ('0' <= c && c <= '8') {
|
1232
|
+
return c - '0';
|
1233
|
+
}
|
1234
|
+
return -1;
|
1235
|
+
}
|
1236
|
+
|
1237
|
+
inline int HexValue(const int c) const {
|
1238
|
+
if ('0' <= c && c <= '9') {
|
1239
|
+
return c - '0';
|
1240
|
+
}
|
1241
|
+
if ('a' <= c && c <= 'f') {
|
1242
|
+
return c - 'a' + 10;
|
1243
|
+
}
|
1244
|
+
if ('A' <= c && c <= 'F') {
|
1245
|
+
return c - 'A' + 10;
|
1246
|
+
}
|
1247
|
+
return -1;
|
1248
|
+
}
|
1249
|
+
|
1250
|
+
void ScanDecimalDigits() {
|
1251
|
+
while (Chars::IsDecimalDigit(c_)) {
|
1252
|
+
Record8Advance();
|
1253
|
+
}
|
1254
|
+
}
|
1255
|
+
|
1256
|
+
void SkipLineTerminator() {
|
1257
|
+
const uc16 c = c_;
|
1258
|
+
Advance();
|
1259
|
+
if (c + c_ == '\n' + '\r') {
|
1260
|
+
Advance();
|
1261
|
+
}
|
1262
|
+
++line_number_;
|
1263
|
+
}
|
1264
|
+
|
1265
|
+
BasicSource* source_;
|
1266
|
+
std::vector<char> buffer8_;
|
1267
|
+
std::vector<uc16> buffer16_;
|
1268
|
+
double numeric_;
|
1269
|
+
State type_;
|
1270
|
+
std::size_t pos_;
|
1271
|
+
const std::size_t end_;
|
1272
|
+
bool has_line_terminator_before_next_;
|
1273
|
+
bool has_shebang_;
|
1274
|
+
int c_;
|
1275
|
+
std::size_t line_number_;
|
1276
|
+
Location location_;
|
1277
|
+
};
|
1278
|
+
|
1279
|
+
|
1280
|
+
} } // namespace iv::core
|
1281
|
+
#endif // _IV_LEXER_H_
|