minjs 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,28 +1,36 @@
1
- module Minjs
2
- module Func
3
- #13
1
+ module Minjs::Lex
2
+ # Function
3
+ module Function
4
+ include Minjs
5
+ # Tests next literal is FunctionDeclaration or not.
4
6
  #
5
- # FunctionDeclaration :
6
- # function Identifier ( FormalParameterListopt ) { FunctionBody }
7
+ # If literal is FunctionDeclaration
8
+ # return ECMA262::StFunc object and
9
+ # forward lexical parser position.
10
+ # Otherwise return nil and position is not changed.
7
11
  #
8
- # NOTE:
12
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 13
9
13
  #
10
- # The function declaration in statement(block) is not permitted by ECMA262.
11
- # However, almost all implementation permit it.
14
+ # @note
15
+ # The function declaration in statement(block) is not permitted by ECMA262.
16
+ # However, almost all implementation permit it, so minjs cannot raise
17
+ # exception even if function declarataion in block.
12
18
  #
13
- def func_declaration(lex, context)
19
+ def func_declaration(context)
20
+ # FunctionDeclaration :
21
+ # function Identifier ( FormalParameterListopt ) { FunctionBody }
14
22
  return nil if lex.eql_lit?(ECMA262::ID_FUNCTION).nil?
15
23
 
16
24
  new_context = ECMA262::Context.new
17
25
  new_context.lex_env = context.lex_env.new_declarative_env()
18
26
  new_context.var_env = context.var_env.new_declarative_env()
19
27
 
20
- if id=identifier(lex, context) and
28
+ if id=identifier(context) and
21
29
  lex.eql_lit?(ECMA262::PUNC_LPARENTHESIS) and
22
- args = formal_parameter_list(lex, new_context) and
30
+ args = formal_parameter_list(new_context) and
23
31
  lex.eql_lit?(ECMA262::PUNC_RPARENTHESIS) and
24
32
  lex.eql_lit?(ECMA262::PUNC_LCURLYBRAC) and
25
- b=func_body(lex, new_context) and lex.eql_lit?(ECMA262::PUNC_RCURLYBRAC)
33
+ b=func_body(new_context) and lex.eql_lit?(ECMA262::PUNC_RCURLYBRAC)
26
34
  f = ECMA262::StFunc.new(new_context, id, args, b, {:decl => true})
27
35
 
28
36
  context.var_env.record.create_mutable_binding(id, nil)
@@ -39,25 +47,35 @@ module Minjs
39
47
  end
40
48
  end
41
49
 
42
- #13
50
+ # Tests next literal is FunctionExpression or not.
43
51
  #
44
- # FunctionExpression :
45
- # function Identifieropt ( FormalParameterListopt ) { FunctionBody }
52
+ # If literal is FunctionExpression
53
+ # return ECMA262::StFunc object and
54
+ # forward lexical parser position.
55
+ # Otherwise return nil and position is not changed.
46
56
  #
47
- def func_exp(lex, context)
57
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 13
58
+ #
59
+ # @note
60
+ # The function expression and declaration uses same class
61
+ # for convenience.
62
+ #
63
+ def func_exp(context)
64
+ # FunctionExpression :
65
+ # function Identifieropt ( FormalParameterListopt ) { FunctionBody }
48
66
  return nil if lex.eql_lit?(ECMA262::ID_FUNCTION).nil?
49
67
  @logger.debug "*** func_exp"
50
68
 
51
- id_opt = identifier(lex, context)
69
+ id_opt = identifier(context)
52
70
  new_context = ECMA262::Context.new
53
71
  new_context.lex_env = context.lex_env.new_declarative_env()
54
72
  new_context.var_env = context.var_env.new_declarative_env()
55
73
 
56
74
  if lex.eql_lit?(ECMA262::PUNC_LPARENTHESIS) and
57
- args = formal_parameter_list(lex, new_context) and
75
+ args = formal_parameter_list(new_context) and
58
76
  lex.eql_lit?(ECMA262::PUNC_RPARENTHESIS) and
59
77
  lex.eql_lit?(ECMA262::PUNC_LCURLYBRAC) and
60
- b = func_body(lex, new_context) and lex.eql_lit?(ECMA262::PUNC_RCURLYBRAC)
78
+ b = func_body(new_context) and lex.eql_lit?(ECMA262::PUNC_RCURLYBRAC)
61
79
  f = ECMA262::StFunc.new(new_context, id_opt, args, b)
62
80
  if id_opt
63
81
  new_context.var_env.record.create_mutable_binding(id_opt, nil)
@@ -76,11 +94,11 @@ module Minjs
76
94
  end
77
95
  end
78
96
 
79
- def formal_parameter_list(lex, context)
97
+ def formal_parameter_list(context)
80
98
  ret = []
81
99
  unless lex.peek_lit(nil).eql? ECMA262::PUNC_RPARENTHESIS
82
100
  while true
83
- if arg = identifier(lex, context)
101
+ if arg = identifier(context)
84
102
  ret.push(arg)
85
103
  else
86
104
  raise ParseError.new("unexpceted token", lex)
@@ -103,8 +121,10 @@ module Minjs
103
121
  ret
104
122
  end
105
123
 
106
- def func_body(lex, context)
107
- source_elements(lex, context)
124
+ def func_body(context)
125
+ source_elements(context)
108
126
  end
127
+
128
+ private :func_body, :formal_parameter_list
109
129
  end
110
130
  end
@@ -0,0 +1,1147 @@
1
+ # coding: utf-8
2
+ require 'minjs/ctype'
3
+ require 'minjs/ecma262'
4
+
5
+ module Minjs::Lex
6
+ # ECMA262 Parser class
7
+ #
8
+ # This class parses ECMA262 script language's source text
9
+ # and convers it to elements (ECMA262::Base).
10
+ class Parser
11
+ include Minjs
12
+ include Ctype
13
+ include Lex::Program
14
+ include Lex::Statement
15
+ include Lex::Expression
16
+ include Lex::Function
17
+
18
+ attr_reader :pos
19
+ attr_reader :codes
20
+
21
+ # @param source_text [String] input source text
22
+ # @option options :logger [Logger] logger for debug
23
+ def initialize(source_text = "", options = {})
24
+ source_text = source_text.gsub(/\r\n/, "\n")
25
+ @codes = source_text.codepoints
26
+ if !source_text.match(/\n\z/)
27
+ @codes.push(10)
28
+ end
29
+ @pos = 0
30
+ clear_cache
31
+ @logger = options[:logger]
32
+
33
+ @eval_nest = 0
34
+ end
35
+
36
+ # return Parser itself
37
+ def lex
38
+ self
39
+ end
40
+
41
+ # clear cache of ECMA262 elements
42
+ def clear_cache
43
+ @lit_cache = {}
44
+ @lit_nextpos = {}
45
+ end
46
+
47
+ # Fetch next literal and forward position.
48
+ #
49
+ # @param hint [Symbol] hint of parsing. The hint must be one of the
50
+ # :regexp, :div, nil
51
+ # The hint parameter is used to determine next literal is division-mark or
52
+ # regular expression. because ECMA262 says:
53
+ #
54
+ # There are no syntactic grammar contexts where both a leading division
55
+ # or division-assignment, and a leading RegularExpressionLiteral are permitted.
56
+ # This is not affected by semicolon insertion (see 7.9); in examples such as the following:
57
+ # To determine `/' is regular expression or not
58
+ #
59
+ def next_input_element(hint)
60
+ if ret = @lit_cache[@pos]
61
+ @pos = @lit_nextpos[@pos]
62
+ @head_pos = @pos
63
+ return ret
64
+ end
65
+ pos0 = @pos
66
+ #
67
+ # skip white space here, because ECMA262(5.1.2) says:
68
+ #
69
+ # Simple white space and single-line comments are discarded and
70
+ # do not appear in the stream of input elements for the
71
+ # syntactic grammar.
72
+ #
73
+ while white_space or single_line_comment
74
+ end
75
+
76
+ ret = line_terminator || multi_line_comment || token
77
+ if ret
78
+ @lit_cache[pos0] = ret
79
+ @lit_nextpos[pos0] = @pos
80
+ @head_pos = @pos
81
+ return ret
82
+ end
83
+
84
+ if @codes[@pos].nil?
85
+ return nil
86
+ end
87
+ if hint.nil?
88
+ if @codes[@pos] == 0x2f
89
+ ECMA262::LIT_DIV_OR_REGEXP_LITERAL
90
+ else
91
+ nil
92
+ end
93
+ elsif hint == :div
94
+ ret = div_punctuator
95
+ if ret
96
+ @lit_cache[pos0] = ret
97
+ @lit_nextpos[pos0] = @pos
98
+ end
99
+ @head_pos = @pos
100
+ return ret
101
+ elsif hint == :regexp
102
+ ret = regexp_literal
103
+ if ret
104
+ @lit_cache[pos0] = ret
105
+ @lit_nextpos[pos0] = @pos
106
+ end
107
+ @head_pos = @pos
108
+ return ret
109
+ else
110
+ if @codes[@pos] == 0x2f
111
+ ECMA262::LIT_DIV_OR_REGEXP_LITERAL
112
+ else
113
+ nil
114
+ end
115
+ end
116
+ end
117
+
118
+ # Tests next literal is WhiteSpace or not.
119
+ #
120
+ # If literal is WhiteSpace
121
+ # return ECMA262::WhiteSpace object and
122
+ # forward lexical parser position.
123
+ # Otherwise return nil and position is not changed.
124
+ #
125
+ # Even if next literal is sequence of two or more white spaces,
126
+ # this method returns only one white space.
127
+ #
128
+ # @return [ECMA262::WhiteSpace] element
129
+ #
130
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.2
131
+ def white_space
132
+ if white_space?(@codes[@pos])
133
+ begin
134
+ @pos += 1
135
+ end until !white_space?(@codes[@pos])
136
+ return ECMA262::WhiteSpace.get
137
+ else
138
+ nil
139
+ end
140
+ end
141
+
142
+ # Tests next literal is LineTerminator or not.
143
+ #
144
+ # If literal is LineTerminator
145
+ # return ECMA262::LineTerminator object and
146
+ # forward lexical parser position.
147
+ # Otherwise return nil and position is not changed.
148
+ #
149
+ # Even if next literal is sequence of two or more line terminators,
150
+ # this method returns only one line terminator.
151
+ #
152
+ # @return [ECMA262::LineTerminator] element
153
+ #
154
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.3
155
+ def line_terminator
156
+ if line_terminator?(@codes[@pos])
157
+ begin
158
+ @pos += 1
159
+ end until !line_terminator?(@codes[@pos])
160
+ return ECMA262::LineTerminator.get
161
+ else
162
+ nil
163
+ end
164
+ end
165
+
166
+ # Tests next literal is Comment or not.
167
+ #
168
+ # If literal is Comment
169
+ # return ECMA262::MultiLineComment or SingeLineComment object and
170
+ # forward lexical parser position.
171
+ # Otherwise return nil and position is not changed.
172
+ #
173
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.4
174
+ def comment
175
+ multi_line_comment || single_line_comment
176
+ end
177
+
178
+ # Tests next literal is MultiLineComment or not.
179
+ #
180
+ # If literal is MultiLineComment
181
+ # return ECMA262::MultiLineComment object and
182
+ # forward lexical parser position.
183
+ # Otherwise return nil and position is not changed.
184
+ #
185
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.4
186
+ def multi_line_comment
187
+ # /*
188
+ if @codes[@pos] == 0x2f and @codes[@pos + 1] == 0x2a
189
+ @pos += 2
190
+ pos0 = @pos
191
+ # */
192
+ while (code = @codes[@pos] != 0x2a) or @codes[@pos + 1] != 0x2f
193
+ raise ParseError.new("no `*/' at end of comment", self) if code.nil?
194
+ @pos += 1
195
+ end
196
+ @pos +=2
197
+ return ECMA262::MultiLineComment.new(@codes[pos0...(@pos-2)].pack("U*"))
198
+ else
199
+ nil
200
+ end
201
+ end
202
+
203
+ # Tests next literal is SinleLineComment or not.
204
+ #
205
+ # If literal is SingleLineComment
206
+ # return ECMA262::SingleLineComment object and
207
+ # forward lexical parser position.
208
+ # Otherwise return nil and position is not changed.
209
+ #
210
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.4
211
+ def single_line_comment
212
+ # //
213
+ if @codes[@pos] == 0x2f and @codes[@pos + 1] == 0x2f
214
+ @pos += 2
215
+ pos0 = @pos
216
+ while (code = @codes[@pos]) and !line_terminator?(code)
217
+ @pos += 1
218
+ end
219
+ return ECMA262::SingleLineComment.new(@codes[pos0...@pos].pack("U*"))
220
+ else
221
+ nil
222
+ end
223
+ end
224
+
225
+ # Tests next literal is Token or not
226
+ #
227
+ # If literal is Token
228
+ # return ECMA262::Base object and
229
+ # forward lexical parser position.
230
+ # Otherwise return nil and position is not changed.
231
+ #
232
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.5
233
+ def token
234
+ identifier_name || numeric_literal || punctuator || string_literal
235
+ end
236
+
237
+ def unicode_escape?
238
+ # @codes[@pos] == 0x5c
239
+ if @codes[@pos+1] == 0x75 #u
240
+ if hex_digit?(@codes[@pos+2]) and
241
+ hex_digit?(@codes[@pos+3]) and
242
+ hex_digit?(@codes[@pos+4]) and
243
+ hex_digit?(@codes[@pos+5])
244
+ @codes[(@pos+2)..(@pos+5)].pack("U*").to_i(16)
245
+ else
246
+ raise ParseError.new("bad unicode escpae sequence", self)
247
+ end
248
+ else
249
+ nil
250
+ end
251
+ end
252
+ private :unicode_escape?
253
+
254
+ # Tests next literal is IdentifierName or not
255
+ #
256
+ # If literal is IdentifierName
257
+ # return ECMA262::IdentifierName object and
258
+ # forward lexical parser position.
259
+ # Otherwise return nil and position is not changed.
260
+ #
261
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.6
262
+ def identifier_name
263
+ return nil if (code = @codes[@pos]).nil?
264
+
265
+ pos0 = @pos
266
+ chars = []
267
+ if code == 0x5c and ucode = unicode_escape? and identifier_start?(ucode)
268
+ chars.push(ucode)
269
+ @pos += 6
270
+ elsif identifier_start?(code)
271
+ chars.push(code)
272
+ @pos += 1
273
+ else
274
+ return nil
275
+ end
276
+
277
+ while true
278
+ code = @codes[@pos]
279
+ if code == 0x5c and ucode = unicode_escape? and identifier_part?(ucode)
280
+ chars.push(ucode)
281
+ @pos += 6
282
+ elsif identifier_part?(code)
283
+ chars.push(code)
284
+ @pos += 1
285
+ else
286
+ name = chars.pack("U*").to_sym
287
+ return ECMA262::IdentifierName.get(nil, name)
288
+ end
289
+ end
290
+ end
291
+
292
+ # Tests next literal is Punctuator or not
293
+ #
294
+ # If literal is Punctuator
295
+ # return ECMA262::Punctuator object and
296
+ # forward lexical parser position.
297
+ # Otherwise return nil and position is not changed.
298
+ #
299
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.7
300
+ def punctuator
301
+ code0 = @codes[@pos]
302
+ code1 = @codes[@pos+1]
303
+ code2 = @codes[@pos+2]
304
+ code3 = @codes[@pos+3]
305
+ if false
306
+ elsif code0 == 0x28 # (
307
+ @pos += 1 # (
308
+ return ECMA262::PUNC_LPARENTHESIS
309
+ elsif code0 == 0x29 # )
310
+ @pos += 1 # )
311
+ return ECMA262::PUNC_RPARENTHESIS
312
+ elsif code0 == 0x7b # {
313
+ @pos += 1 # {
314
+ return ECMA262::PUNC_LCURLYBRAC
315
+ elsif code0 == 0x7d # }
316
+ @pos += 1 # }
317
+ return ECMA262::PUNC_RCURLYBRAC
318
+ elsif code0 == 0x3b # ;
319
+ @pos += 1 # ;
320
+ return ECMA262::PUNC_SEMICOLON
321
+ elsif code0 == 0x3d # =
322
+ if code1 == 0x3d and code2 == 0x3d # ===
323
+ @pos += 3
324
+ return ECMA262::PUNC_SEQ
325
+ end
326
+ if code1 == 0x3d # ==
327
+ @pos += 2
328
+ return ECMA262::PUNC_EQ
329
+ end
330
+ @pos += 1 # =
331
+ return ECMA262::PUNC_ASSIGN
332
+ elsif code0 == 0x21 # !
333
+ if code1 == 0x3d and code2 == 0x3d # !==
334
+ @pos += 3
335
+ return ECMA262::PUNC_SNEQ
336
+ end
337
+ if code1 == 0x3d # !=
338
+ @pos += 2
339
+ return ECMA262::PUNC_NEQ
340
+ end
341
+ @pos += 1 # !
342
+ return ECMA262::PUNC_LNOT
343
+ elsif code0 == 0x25 # %
344
+ if code1 == 0x3d # %=
345
+ @pos += 2
346
+ return ECMA262::PUNC_MODASSIGN
347
+ end
348
+ @pos += 1 # %
349
+ return ECMA262::PUNC_MOD
350
+ elsif code0 == 0x26 # &
351
+ if code1 == 0x3d # &=
352
+ @pos += 2
353
+ return ECMA262::PUNC_ANDASSIGN
354
+ end
355
+ if code1 == 0x26 # &&
356
+ @pos += 2
357
+ return ECMA262::PUNC_LAND
358
+ end
359
+ @pos += 1 # &
360
+ return ECMA262::PUNC_AND
361
+ elsif code0 == 0x2a # *
362
+ if code1 == 0x3d # *=
363
+ @pos += 2
364
+ return ECMA262::PUNC_MULASSIGN
365
+ end
366
+ @pos += 1 # *
367
+ return ECMA262::PUNC_MUL
368
+ elsif code0 == 0x2b # +
369
+ if code1 == 0x3d # +=
370
+ @pos += 2
371
+ return ECMA262::PUNC_ADDASSIGN
372
+ end
373
+ if code1 == 0x2b # ++
374
+ @pos += 2
375
+ return ECMA262::PUNC_INC
376
+ end
377
+ @pos += 1 # +
378
+ return ECMA262::PUNC_ADD
379
+ elsif code0 == 0x2c # ,
380
+ @pos += 1 # ,
381
+ return ECMA262::PUNC_COMMA
382
+ elsif code0 == 0x2d # -
383
+ if code1 == 0x3d # -=
384
+ @pos += 2
385
+ return ECMA262::PUNC_SUBASSIGN
386
+ end
387
+ if code1 == 0x2d # --
388
+ @pos += 2
389
+ return ECMA262::PUNC_DEC
390
+ end
391
+ @pos += 1 # -
392
+ return ECMA262::PUNC_SUB
393
+ elsif code0 == 0x2e # .
394
+ @pos += 1 # .
395
+ return ECMA262::PUNC_PERIOD
396
+ elsif code0 == 0x3a # :
397
+ @pos += 1 # :
398
+ return ECMA262::PUNC_COLON
399
+ elsif code0 == 0x3c # <
400
+ if code1 == 0x3d # <=
401
+ @pos += 2
402
+ return ECMA262::PUNC_LTEQ
403
+ end
404
+ if code1 == 0x3c and code2 == 0x3d # <<=
405
+ @pos += 3
406
+ return ECMA262::PUNC_LSHIFTASSIGN
407
+ end
408
+ if code1 == 0x3c # <<
409
+ @pos += 2
410
+ return ECMA262::PUNC_LSHIFT
411
+ end
412
+ @pos += 1 # <
413
+ return ECMA262::PUNC_LT
414
+ elsif code0 == 0x3e # >
415
+ if code1 == 0x3e and code2 == 0x3e and code3 == 0x3d # >>>=
416
+ @pos += 4
417
+ return ECMA262::PUNC_URSHIFTASSIGN
418
+ end
419
+ if code1 == 0x3e and code2 == 0x3e # >>>
420
+ @pos += 3
421
+ return ECMA262::PUNC_URSHIFT
422
+ end
423
+ if code1 == 0x3e and code2 == 0x3d # >>=
424
+ @pos += 3
425
+ return ECMA262::PUNC_RSHIFTASSIGN
426
+ end
427
+ if code1 == 0x3e # >>
428
+ @pos += 2
429
+ return ECMA262::PUNC_RSHIFT
430
+ end
431
+ if code1 == 0x3d # >=
432
+ @pos += 2
433
+ return ECMA262::PUNC_GTEQ
434
+ end
435
+ @pos += 1 # >
436
+ return ECMA262::PUNC_GT
437
+ elsif code0 == 0x3f # ?
438
+ @pos += 1 # ?
439
+ return ECMA262::PUNC_CONDIF
440
+ elsif code0 == 0x5b # [
441
+ @pos += 1 # [
442
+ return ECMA262::PUNC_LSQBRAC
443
+ elsif code0 == 0x5d # ]
444
+ @pos += 1 # ]
445
+ return ECMA262::PUNC_RSQBRAC
446
+ elsif code0 == 0x5e # ^
447
+ if code1 == 0x3d # ^=
448
+ @pos += 2
449
+ return ECMA262::PUNC_XORASSIGN
450
+ end
451
+ @pos += 1 # ^
452
+ return ECMA262::PUNC_XOR
453
+ elsif code0 == 0x7c # |
454
+ if code1 == 0x7c # ||
455
+ @pos += 2
456
+ return ECMA262::PUNC_LOR
457
+ end
458
+ if code1 == 0x3d # |=
459
+ @pos += 2
460
+ return ECMA262::PUNC_ORASSIGN
461
+ end
462
+ @pos += 1 # |
463
+ return ECMA262::PUNC_OR
464
+ elsif code0 == 0x7e # ~
465
+ @pos += 1 # ~
466
+ return ECMA262::PUNC_NOT
467
+ end
468
+ nil
469
+ end
470
+
471
+ # Tests next literal is DivPunctuator or not.
472
+ #
473
+ # If literal is DivPunctuator
474
+ # return ECMA262::PUNC_DIV or ECMA262::PUNC_DIVASSIGN object and
475
+ # forward lexical parser position.
476
+ # Otherwise return nil and position is not changed.
477
+ #
478
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.7
479
+ def div_punctuator
480
+ if @codes[@pos] == 0x2f
481
+ if @codes[@pos+1] == 0x3d
482
+ @pos += 2
483
+ return ECMA262::PUNC_DIVASSIGN
484
+ else
485
+ @pos += 1
486
+ return ECMA262::PUNC_DIV
487
+ end
488
+ end
489
+ nil
490
+ end
491
+
492
+ # Tests next literal is RegExp or not.
493
+ #
494
+ # If literal is RegExp
495
+ # return ECMA262::ECMA262RegExp object and
496
+ # forward lexical parser position.
497
+ # Otherwise return nil and position is not changed.
498
+ #
499
+ # @return [ECMA262::RegExp]
500
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.8.5
501
+ def regexp_literal
502
+ # RegularExpressionLiteral::
503
+ # / RegularExpressionBody / RegularExpressionFlags
504
+ pos0 = @pos
505
+ return nil unless @codes[@pos] == 0x2f
506
+
507
+ body = regexp_body
508
+ flags = regexp_flags
509
+ return ECMA262::ECMA262RegExp.new(body, flags)
510
+ end
511
+
512
+ def regexp_body
513
+ if @codes[@pos] == 0x2a
514
+ raise ParseError.new("first character of regular expression is `*'", self)
515
+ end
516
+ pos0 = @pos
517
+ @pos += 1
518
+ while !(@codes[@pos] == 0x2f)
519
+ if @codes[@pos].nil?
520
+ raise ParseError.new("no `/' end of regular expression", self)
521
+ end
522
+ if line_terminator?(@codes[@pos])
523
+ raise ParseError.new("regular expression has line terminator in body", self)
524
+ end
525
+ if @codes[@pos] == 0x5c # \
526
+ @pos += 1
527
+ if line_terminator?(@codes[@pos])
528
+ raise ParseError.new("regular expression has line terminator in body", self)
529
+ end
530
+ @pos += 1
531
+ elsif @codes[@pos] == 0x5b # [
532
+ regexp_class
533
+ else
534
+ @pos += 1
535
+ end
536
+ end
537
+ @pos += 1
538
+ return @codes[(pos0+1)...(@pos-1)].pack("U*")
539
+ end
540
+
541
+ def regexp_class
542
+ if @codes[@pos] != 0x5b
543
+ raise ParseError.new('bad regular expression', self)
544
+ end
545
+ @pos += 1
546
+ while !(@codes[@pos] == 0x5d)
547
+ if @codes[@pos].nil?
548
+ raise ParseError.new("no `]' end of regular expression class", self)
549
+ end
550
+ if line_terminator?(@codes[@pos])
551
+ raise ParseError.new("regular expression has line terminator in body", self)
552
+ end
553
+ if @codes[@pos] == 0x5c # \
554
+ @pos += 1
555
+ if line_terminator?(@codes[@pos])
556
+ raise ParseError.new("regular expression has line terminator in body", self)
557
+ end
558
+ @pos += 1
559
+ else
560
+ @pos += 1
561
+ end
562
+ end
563
+ @pos += 1
564
+ end
565
+
566
+ def regexp_flags
567
+ pos0 = @pos
568
+ while(identifier_part?(@codes[@pos]))
569
+ @pos += 1
570
+ end
571
+ return @codes[pos0...@pos].pack("U*")
572
+ end
573
+
574
+ private :regexp_flags, :regexp_class, :regexp_body
575
+
576
+ # Tests next literal is NumericLiteral or not.
577
+ #
578
+ # If literal is NumericLiteral
579
+ # return ECMA262::ECMA262Numeric object and
580
+ # forward lexical parser position.
581
+ # Otherwise return nil and position is not changed.
582
+ #
583
+ # @return [ECMA262::ECMA262Numeric]
584
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.8.3
585
+ def numeric_literal
586
+ hex_integer_literal || octal_integer_literal || decimal_literal
587
+ end
588
+
589
+ #7.8.3
590
+ #
591
+ # HexIntegerLiteral ::
592
+ # 0x HexDigit
593
+ # 0X HexDigit
594
+ # HexIntegerLiteral HexDigit
595
+ #
596
+ def hex_integer_literal
597
+ code = @codes[@pos]
598
+ if code.nil?
599
+ return nil
600
+ #0x / 0X
601
+ elsif code == 0x30 and (@codes[@pos+1] == 0x78 || @codes[@pos+1] == 0x58)
602
+ @pos += 2
603
+ pos0 = @pos
604
+ while code = @codes[@pos] and hex_digit?(code)
605
+ @pos += 1;
606
+ end
607
+ if identifier_start?(code)
608
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
609
+ else
610
+ return ECMA262::ECMA262Numeric.new(@codes[pos0...@pos].pack("U*").to_i(16))
611
+ end
612
+ else
613
+ nil
614
+ end
615
+ end
616
+
617
+ #B.1.1
618
+ # OctalIntegerLiteral ::
619
+ # 0 OctalDigit
620
+ # OctalIntegerLiteral OctalDigit
621
+ #
622
+ def octal_integer_literal
623
+ code = @codes[@pos]
624
+ if code.nil?
625
+ return nil
626
+ elsif code == 0x30 and (code1 = @codes[@pos + 1]) >= 0x30 and code1 <= 0x37
627
+ @pos += 1
628
+ pos0 = @pos
629
+ while code = @codes[@pos] and code >= 0x30 and code <= 0x37
630
+ @pos += 1
631
+ end
632
+ if identifier_start?(code)
633
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
634
+ else
635
+ return ECMA262::ECMA262Numeric.new(@codes[pos0...@pos].pack("U*").to_i(8))
636
+ end
637
+ else
638
+ nil
639
+ end
640
+ end
641
+
642
+ # 7.8.3
643
+ #
644
+ # DecimalLiteral ::
645
+ # DecimalIntegerLiteral . DecimalDigitsopt ExponentPartopt
646
+ # . DecimalDigits ExponentPartopt
647
+ # DecimalIntegerLiteral ExponentPartopt
648
+ #
649
+ def decimal_literal
650
+ pos0 = @pos
651
+ code = @codes[@pos]
652
+
653
+ if code.nil?
654
+ return nil
655
+ elsif code == 0x2e #.
656
+ @pos += 1
657
+ f = decimal_digits
658
+ if f.nil? #=> this period is punctuator
659
+ @pos = pos0 + 1
660
+ return ECMA262::PUNC_PERIOD
661
+ end
662
+ if (code = @codes[@pos]) == 0x65 || code == 0x45
663
+ @pos += 1
664
+ e = exponent_part
665
+ end
666
+ if identifier_start?(@codes[@pos])
667
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
668
+ end
669
+
670
+ return ECMA262::ECMA262Numeric.new('0', f, e)
671
+ elsif code == 0x30 # zero
672
+ i = "0"
673
+ @pos += 1
674
+ if @codes[@pos] == 0x2e #.
675
+ @pos += 1
676
+ f = decimal_digits
677
+ if (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
678
+ @pos += 1
679
+ e = exponent_part
680
+ end
681
+ elsif (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
682
+ @pos += 1
683
+ e = exponent_part
684
+ end
685
+ if identifier_start?(@codes[@pos])
686
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
687
+ end
688
+
689
+ return ECMA262::ECMA262Numeric.new(i, f, e)
690
+ elsif code >= 0x31 and code <= 0x39
691
+ i = decimal_digits
692
+ if @codes[@pos] == 0x2e #.
693
+ @pos += 1
694
+ f = decimal_digits
695
+ if (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
696
+ @pos += 1
697
+ e = exponent_part
698
+ end
699
+ elsif (code = @codes[@pos]) == 0x65 || code == 0x45 #e or E
700
+ @pos += 1
701
+ e = exponent_part
702
+ end
703
+ if identifier_start?(@codes[@pos])
704
+ raise ParseError.new("The source character immediately following a NumericLiteral must not be an IdentifierStart or DecimalDigit", self)
705
+ end
706
+
707
+ return ECMA262::ECMA262Numeric.new(i, f, e)
708
+ end
709
+
710
+ nil
711
+ end
712
+
713
+ # 7.8.3
714
+ #
715
+ # ExponentPart ::
716
+ # ExponentIndicator SignedInteger
717
+ #
718
+ def exponent_part
719
+ if (code = @codes[@pos]) == 0x2b
720
+ @pos += 1
721
+ elsif code == 0x2d
722
+ @pos += 1
723
+ neg = true
724
+ end
725
+ d = decimal_digits
726
+ raise ParseError.new("unexpecting token", self) if d.nil?
727
+ if neg
728
+ e = "-#{d}"
729
+ else
730
+ e = d
731
+ end
732
+ e
733
+ end
734
+
735
+ #7.8.3
736
+ #
737
+ # DecimalDigit :: one of
738
+ # 0 1 2 3 4 5 6 7 8 9
739
+ #
740
+ def decimal_digits
741
+ pos0 = @pos
742
+ if (code = @codes[@pos]) >= 0x30 and code <= 0x39
743
+ @pos += 1
744
+ while code = @codes[@pos] and code >= 0x30 and code <= 0x39
745
+ @pos += 1
746
+ end
747
+ return @codes[pos0...@pos].pack("U*")
748
+ else
749
+ nil
750
+ end
751
+ end
752
+ private :hex_integer_literal, :octal_integer_literal, :decimal_literal,
753
+ :exponent_part, :decimal_digits
754
+
755
+ # Tests next literal is StringLiteral or not.
756
+ #
757
+ # If literal is StringLiteral
758
+ # return ECMA262::ECMA262String object and
759
+ # forward lexical parser position.
760
+ # Otherwise return nil and position is not changed.
761
+ #
762
+ # @return [ECMA262::ECMA262String]
763
+ # @see http://www.ecma-international.org/ecma-262 ECMA262 7.8.4
764
+ #
765
+ def string_literal
766
+ # StringLiteral ::
767
+ # " DoubleStringCharactersopt "
768
+ # ' SingleStringCharactersopt '
769
+ #
770
+ # DoubleStringCharacters ::
771
+ # DoubleStringCharacter DoubleStringCharactersopt
772
+ #
773
+ # SingleStringCharacters ::
774
+ # SingleStringCharacter SingleStringCharactersopt
775
+ #
776
+ # DoubleStringCharacter ::
777
+ # SourceCharacter but not one of " or \ or LineTerminator
778
+ # \ EscapeSequence
779
+ # LineContinuation
780
+ #
781
+ # SingleStringCharacter ::
782
+ # SourceCharacter but not one of ' or \ or LineTerminator
783
+ # \ EscapeSequence
784
+ # LineContinuation
785
+ #
786
+ if (code = @codes[@pos]) == 0x27 #'
787
+ term = 0x27
788
+ elsif code == 0x22 #"
789
+ term = 0x22
790
+ else
791
+ return nil
792
+ end
793
+ @pos += 1
794
+ pos0 = @pos
795
+
796
+ str = []
797
+ while (code = @codes[@pos])
798
+ if code.nil?
799
+ raise ParseError.new("no `#{term}' at end of string", self)
800
+ elsif line_terminator?(code)
801
+ raise ParseError.new("string has line terminator in body", self)
802
+ elsif code == 0x5c #\
803
+ @pos += 1
804
+ str.push(escape_sequence)
805
+ elsif code == term
806
+ @pos += 1
807
+ return ECMA262::ECMA262String.new(str.compact.pack("U*"))
808
+ else
809
+ @pos += 1
810
+ str.push(code)
811
+ end
812
+ end
813
+ nil
814
+ end
815
+
816
+ # 7.8.4
817
+ # B.1.2
818
+ #
819
+ # EscapeSequence ::
820
+ # CharacterEscapeSequence
821
+ # 0 [lookahead ∉ DecimalDigit]
822
+ # HexEscapeSequence
823
+ # UnicodeEscapeSequence
824
+ # OctalEscapeSequence
825
+
826
+ def escape_sequence
827
+ case (code = @codes[@pos])
828
+ # when 0x30
829
+ # @pos += 1
830
+ # 0
831
+ when 0x27 #'
832
+ @pos += 1
833
+ 0x27
834
+ when 0x22 #"
835
+ @pos += 1
836
+ 0x22
837
+ when 0x5c #\
838
+ @pos += 1
839
+ 0x5c
840
+ when 0x62 #b
841
+ @pos += 1
842
+ 0x08
843
+ when 0x74 #t
844
+ @pos += 1
845
+ 0x09
846
+ when 0x6e #n
847
+ @pos += 1
848
+ 0x0a
849
+ when 0x76 #v
850
+ @pos += 1
851
+ 0x0b
852
+ when 0x66 #f
853
+ @pos += 1
854
+ 0x0c
855
+ when 0x72 #r
856
+ @pos += 1
857
+ 0x0d
858
+ when 0x78 #x
859
+ #check
860
+ t = @codes[(@pos+1)..(@pos+2)].pack("U*").to_i(16)
861
+ @pos += 3
862
+ t
863
+ when 0x75 #u
864
+ #check
865
+ t = @codes[(@pos+1)..(@pos+4)].pack("U*").to_i(16)
866
+ @pos += 5
867
+ t
868
+ else
869
+ # line continuation
870
+ if line_terminator?(code)
871
+ @pos += 1
872
+ nil
873
+ # Annex B.1.2
874
+ #
875
+ # OctalEscapeSequence ::
876
+ # OctalDigit [lookahead ∉ DecimalDigit]
877
+ # ZeroToThree OctalDigit [lookahead ∉ DecimalDigit]
878
+ # FourToSeven OctalDigit
879
+ # ZeroToThree OctalDigit OctalDigit
880
+ #
881
+ # Note:
882
+ #
883
+ # A string such as the following is invalid
884
+ # as a octal escape sequence.
885
+ #
886
+ # \19 or \319
887
+ #
888
+ # However, it is not to an error in most implementations.
889
+ # Therefore, minjs also intepret it such way.
890
+ #
891
+ elsif octal_digit?(code)
892
+ code1 = @codes[@pos+1]
893
+ code2 = @codes[@pos+2]
894
+ if code >= 0x30 and code <= 0x33
895
+ if octal_digit?(code1)
896
+ if octal_digit?(code2)
897
+ @pos += 3
898
+ (code - 0x30) * 64 + (code1 - 0x30) * 8 + (code2 - 0x30)
899
+ else
900
+ @pos += 2
901
+ (code - 0x30) * 8 + (code1 - 0x30)
902
+ end
903
+ else
904
+ @pos += 1
905
+ code - 0x30
906
+ end
907
+ else #if code >= 0x34 and code <= 0x37
908
+ if octal_digit?(code1)
909
+ @pos += 2
910
+ (code - 0x30) * 8 + (code1 - 0x30)
911
+ else
912
+ @pos += 1
913
+ code - 0x30
914
+ end
915
+ end
916
+ else
917
+ @pos += 1
918
+ code
919
+ end
920
+ end
921
+ end
922
+ private :escape_sequence
923
+
924
+ # Returns true if posision is at end of file
925
+ def eof?
926
+ peek_lit(nil).nil?
927
+ end
928
+
929
+ #
930
+ # check next literal is strictly equal to _l_ or not.
931
+ # white spaces and line terminators are skipped and ignored.
932
+ #
933
+ # if next literal is not _l_, position is not forwarded
934
+ # if next literal is _l_, position is forwarded
935
+ #
936
+ def eql_lit?(l, hint = nil)
937
+ lit = peek_lit(hint)
938
+ if lit.eql? l
939
+ fwd_after_peek
940
+ lit
941
+ else
942
+ nil
943
+ end
944
+ end
945
+
946
+ #
947
+ # check next literal is strictly equal to _l_ or not.
948
+ # white spaces are skipped and ignored.
949
+ # line terminators are not ignored.
950
+ #
951
+ # if next literal is not _l_, position is not forwarded
952
+ # if next literal is _l_, position is forwarded
953
+ #
954
+ def eql_lit_nolt?(l, hint = nil)
955
+ lit = peek_lit_nolt(hint)
956
+ if lit.eql? l
957
+ fwd_after_peek
958
+ lit
959
+ else
960
+ nil
961
+ end
962
+ end
963
+
964
+ #
965
+ # check next literal is equal to _l_ or not.
966
+ # white spaces and line terminators are skipped and ignored.
967
+ #
968
+ # if next literal is not _l_, position is not forwarded
969
+ # if next literal is _l_, position is forwarded
970
+ #
971
+ def match_lit?(l, hint = nil)
972
+ lit = peek_lit(hint)
973
+ if lit == l
974
+ fwd_after_peek
975
+ lit
976
+ else
977
+ nil
978
+ end
979
+ end
980
+
981
+ #
982
+ # check next literal is equal to _l_ or not.
983
+ # white spaces are skipped and ignored.
984
+ # line terminators are not ignored.
985
+ #
986
+ # if next literal is not _l_, position is not forwarded
987
+ # if next literal is _l_, position is forwarded
988
+ #
989
+ def match_lit_nolt?(l, hint = nil)
990
+ lit = peek_lit_nolt(hint)
991
+ if lit == l
992
+ fwd_after_peek
993
+ lit
994
+ else
995
+ nil
996
+ end
997
+ end
998
+
999
+ #
1000
+ # fetch next literal.
1001
+ # position is not forwarded.
1002
+ # white spaces and line terminators are skipped and ignored.
1003
+ #
1004
+ def peek_lit(hint)
1005
+ pos0 = @pos
1006
+ while lit = next_input_element(hint) and (lit.ws? or lit.lt?)
1007
+ end
1008
+ @pos = pos0
1009
+ lit
1010
+ end
1011
+
1012
+ # fetch next literal.
1013
+ #
1014
+ # position is not forwarded.
1015
+ # white spaces are skipped and ignored.
1016
+ # line terminators are not ignored.
1017
+ #
1018
+ def peek_lit_nolt(hint)
1019
+ pos0 = @pos
1020
+ while lit = next_input_element(hint) and lit.ws?
1021
+ end
1022
+ @pos = pos0
1023
+ lit
1024
+ end
1025
+
1026
+ # Forwards position after calling peek_lit.
1027
+ #
1028
+ # This method quickly forward position after calling peek_lit.
1029
+ def fwd_after_peek
1030
+ @pos = @head_pos
1031
+ end
1032
+
1033
+ #
1034
+ # fetch next literal.
1035
+ # position is forwarded.
1036
+ # white spaces and line terminators are skipped and ignored.
1037
+ #
1038
+ def fwd_lit(hint)
1039
+ while lit = next_input_element(hint) and (lit.ws? or lit.lt?)
1040
+ end
1041
+ lit
1042
+ end
1043
+
1044
+ #
1045
+ # fetch next literal.
1046
+ # position is forwarded.
1047
+ # white spaces are skipped and ignored.
1048
+ # line terminators are not ignored.
1049
+ #
1050
+ def fwd_lit_nolt(hint)
1051
+ while lit = next_input_element(hint) and lit.ws?
1052
+ end
1053
+ lit
1054
+ end
1055
+
1056
+ #
1057
+ # break <val> => position is rewind, then break with <val>
1058
+ # return <val> => position is rewind, then return <val>
1059
+ # next <val> => position is not rewind, then break with <val>
1060
+ #
1061
+ def eval_lit(&block)
1062
+ begin
1063
+ saved_pos = @pos
1064
+ @eval_nest += 1
1065
+ ret = yield
1066
+ ensure
1067
+ @eval_nest -= 1
1068
+ if ret.nil?
1069
+ @pos = saved_pos
1070
+ nil
1071
+ else
1072
+ if @eval_nest == 0
1073
+ #STDERR.puts "clear_cache [#{saved_pos}..#{@pos}]"
1074
+ clear_cache
1075
+ end
1076
+ end
1077
+ end
1078
+ end
1079
+
1080
+ #
1081
+ # position to [row, col]
1082
+ #
1083
+ def row_col(pos)
1084
+ _pos = 0
1085
+ row = 0
1086
+ col = 1
1087
+ @codes.each do |code|
1088
+ break if _pos >= pos
1089
+ if line_terminator?(code)
1090
+ row += 1
1091
+ col = 0
1092
+ else
1093
+ col += 1
1094
+ end
1095
+ _pos += 1
1096
+ end
1097
+ return [row+1, col+1]
1098
+ end
1099
+
1100
+ #
1101
+ # position to line
1102
+ #
1103
+ def line(pos)
1104
+ pos0 = pos1 = pos
1105
+ while true
1106
+ pos0 -= 1
1107
+ break if line_terminator?(@codes[pos0])
1108
+ end
1109
+ pos0 += 1
1110
+
1111
+ while true
1112
+ break if line_terminator?(@codes[pos1])
1113
+ pos1 += 1
1114
+ end
1115
+
1116
+ @codes[pos0..pos1].pack("U*")
1117
+ end
1118
+
1119
+ # Returns string of input data around _pos_
1120
+ #
1121
+ # @param pos position
1122
+ # @param row row
1123
+ # @param col column
1124
+ # @return [String] string
1125
+ #
1126
+ def debug_str(pos = nil, row = 0, col = 0)
1127
+ if pos.nil?
1128
+ pos = @head_pos or @pos
1129
+ end
1130
+
1131
+ t = ''
1132
+ if col >= 80
1133
+ t << @codes[(pos-80)..(pos+80)].pack("U*")
1134
+ col = 81
1135
+ else
1136
+ t << line(pos)
1137
+ end
1138
+
1139
+ if col and col >= 1
1140
+ col = col - 1;
1141
+ end
1142
+ t << "\n"
1143
+ t << (' ' * col) + "^"
1144
+ t
1145
+ end
1146
+ end
1147
+ end