rubinius-melbourne 3.6 → 3.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -32,6 +32,8 @@ namespace MELBOURNE {
32
32
  #define TRUE true
33
33
  #define FALSE false
34
34
 
35
+ #define TAB_WIDTH 8
36
+
35
37
  #define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
36
38
 
37
39
  static void parser_prepare(rb_parser_state*);
@@ -54,6 +56,8 @@ static int parser_yyerror(rb_parser_state*, const char *);
54
56
  ((id)&ID_SCOPE_MASK) == ID_INSTANCE || \
55
57
  ((id)&ID_SCOPE_MASK) == ID_CLASS))
56
58
 
59
+ # define SET_LEX_STATE(ls) (lex_state = (lex_state_e)(ls))
60
+
57
61
  static int yylex(void*, void *);
58
62
 
59
63
  #define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1))
@@ -71,6 +75,9 @@ static int yylex(void*, void *);
71
75
  #define CMDARG_LEXPOP() BITSTACK_LEXPOP(cmdarg_stack)
72
76
  #define CMDARG_P() BITSTACK_SET_P(cmdarg_stack)
73
77
 
78
+ static int parser_arg_ambiguous(rb_parser_state*, char);
79
+ #define arg_ambiguous(c) parser_arg_ambiguous(parser_state, c)
80
+
74
81
  static void parser_token_info_push(rb_parser_state*, const char *);
75
82
  static void parser_token_info_pop(rb_parser_state*, const char *);
76
83
  #define token_info_push(token) (RTEST(ruby_verbose) \
@@ -126,11 +133,11 @@ static bool parser_in_block(rb_parser_state*);
126
133
  static bool parser_bv_defined(rb_parser_state*, ID);
127
134
  static int parser_bv_var(rb_parser_state*, ID);
128
135
  static NODE *parser_aryset(rb_parser_state*, NODE*, NODE*);
129
- static NODE *parser_attrset(rb_parser_state*, NODE*, ID);
136
+ static NODE *parser_attrset(rb_parser_state*, NODE*, ID, ID);
130
137
  static void rb_parser_backref_error(rb_parser_state*, NODE*);
131
138
  static NODE *parser_node_assign(rb_parser_state*, NODE*, NODE*);
132
139
  static NODE *parser_new_op_assign(rb_parser_state*, NODE*, ID, NODE*);
133
- static NODE *parser_new_attr_op_assign(rb_parser_state*, NODE*, ID, ID, NODE*);
140
+ static NODE *parser_new_attr_op_assign(rb_parser_state*, NODE*, ID, ID, ID, NODE*);
134
141
  static NODE *parser_new_const_op_assign(rb_parser_state*, NODE*, ID, NODE*);
135
142
 
136
143
  static NODE *parser_match_op(rb_parser_state*, NODE*, NODE*);
@@ -144,6 +151,9 @@ static bool parser_local_id(rb_parser_state*, ID);
144
151
  static ID* parser_local_tbl(rb_parser_state*);
145
152
  static ID convert_op(ID id);
146
153
 
154
+ static void parser_heredoc_dedent(rb_parser_state*, NODE*);
155
+ #define heredoc_dedent(str) parser_heredoc_dedent(parser_state, (str))
156
+
147
157
  rb_parser_state *parser_alloc_state() {
148
158
  rb_parser_state *parser_state = (rb_parser_state*)calloc(1, sizeof(rb_parser_state));
149
159
 
@@ -165,6 +175,9 @@ rb_parser_state *parser_alloc_state() {
165
175
  brace_nest = 0;
166
176
  compile_for_eval = 0;
167
177
  cur_mid = 0;
178
+ heredoc_end = 0;
179
+ heredoc_indent = 0;
180
+ heredoc_line_indent = 0;
168
181
  tokenbuf = NULL;
169
182
  tokidx = 0;
170
183
  toksiz = 0;
@@ -187,7 +200,7 @@ rb_parser_state *parser_alloc_state() {
187
200
  return parser_state;
188
201
  }
189
202
 
190
- void *pt_allocate(rb_parser_state *parser_state, int size) {
203
+ void *pt_allocate(rb_parser_state* parser_state, int size) {
191
204
  void *cur;
192
205
 
193
206
  if(!memory_cur || ((memory_cur + size) >= memory_last_addr)) {
@@ -212,7 +225,7 @@ void *pt_allocate(rb_parser_state *parser_state, int size) {
212
225
  return cur;
213
226
  }
214
227
 
215
- void pt_free(rb_parser_state *parser_state) {
228
+ void pt_free(rb_parser_state* parser_state) {
216
229
  int i;
217
230
 
218
231
  free(tokenbuf);
@@ -311,7 +324,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
311
324
  #define list_append(l, i) parser_list_append(parser_state, l, i)
312
325
  #define node_assign(a, b) parser_node_assign(parser_state, a, b)
313
326
  #define new_op_assign(l, o, r) parser_new_op_assign(parser_state, l, o, r)
314
- #define new_attr_op_assign(l,a,o,r) parser_new_attr_op_assign(parser_state, l, a, o, r)
327
+ #define new_attr_op_assign(l,t,a,o,r) parser_new_attr_op_assign(parser_state, l, t, a, o, r)
315
328
  #define new_const_op_assign(l,o,r) parser_new_const_op_assign(parser_state, l, o, r)
316
329
  #define call_bin_op(a, s, b) parser_call_bin_op(parser_state, a, s, b)
317
330
  #define call_uni_op(n, s) parser_call_uni_op(parser_state, n, s)
@@ -330,7 +343,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
330
343
  #define bv_defined(n) parser_bv_defined(parser_state, n)
331
344
  #define bv_var(n) parser_bv_var(parser_state, n)
332
345
  #define aryset(a, b) parser_aryset(parser_state, a, b)
333
- #define attrset(a, b) parser_attrset(parser_state, a, b)
346
+ #define attrset(n, q, id) parser_attrset(parser_state, n, q, id)
334
347
  #define match_op(a, b) parser_match_op(parser_state, a, b)
335
348
  #define new_yield(n) parser_new_yield(parser_state, n)
336
349
  #define dsym_node(n) parser_dsym_node(parser_state, n)
@@ -406,7 +419,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
406
419
  #define STR_NEW3(p,n,e,func) parser_str_new(parser_state, (p), (n), (e), \
407
420
  (func), parser_state->enc)
408
421
  #define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
409
- #define TOK_INTERN(mb) parser_intern3(tok(), toklen(), parser_state->enc)
422
+ #define TOK_INTERN() parser_intern3(tok(), toklen(), parser_state->enc)
410
423
 
411
424
  #define NEW_BLOCK_VAR(b, v) NEW_NODE(NODE_BLOCK_PASS, 0, b, v)
412
425
  #define NEW_REQ_KW NEW_LIT(ID2SYM(parser_intern("*")))
@@ -513,8 +526,9 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
513
526
  %type <node> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner
514
527
  %type <id> fsym keyword_variable user_variable sym symbol operation operation2 operation3
515
528
  %type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg
516
- %type <id> f_kwrest f_label
529
+ %type <id> f_kwrest f_label f_arg_asgn call_op call_op2
517
530
 
531
+ %token END_OF_INPUT 0 "end-of-input"
518
532
  %token tUPLUS /* unary+ */
519
533
  %token tUMINUS /* unary- */
520
534
  %token tPOW /* ** */
@@ -524,11 +538,17 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
524
538
  %token tNEQ /* != */
525
539
  %token tGEQ /* >= */
526
540
  %token tLEQ /* <= */
527
- %token tANDOP tOROP /* && and || */
528
- %token tMATCH tNMATCH /* =~ and !~ */
529
- %token tDOT2 tDOT3 /* .. and ... */
530
- %token tAREF tASET /* [] and []= */
531
- %token tLSHFT tRSHFT /* << and >> */
541
+ %token tANDOP /* && */
542
+ %token tOROP /* || */
543
+ %token tMATCH /* =~ */
544
+ %token tNMATCH /* !~ */
545
+ %token tDOT2 /* .. */
546
+ %token tDOT3 /* ... */
547
+ %token tAREF /* [] */
548
+ %token tASET /* []= */
549
+ %token tLSHFT /* << */
550
+ %token tRSHFT /* >> */
551
+ %token tANDDOT /* &. */
532
552
  %token tCOLON2 /* :: */
533
553
  %token tCOLON3 /* :: at EXPR_BEG */
534
554
  %token <id> tOP_ASGN /* +=, -= etc. */
@@ -538,9 +558,9 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
538
558
  %token tRPAREN /* ) */
539
559
  %token tLBRACK /* [ */
540
560
  %token tLBRACE /* { */
541
- %token tLBRACE_ARG /* { */
561
+ %token tLBRACE_ARG /* { arg */
542
562
  %token tSTAR /* * */
543
- %token tDSTAR /* ** */
563
+ %token tDSTAR /* **arg */
544
564
  %token tAMPER /* & */
545
565
  %token tLAMBDA /* -> */
546
566
  %token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG tSYMBOLS_BEG tQSYMBOLS_BEG
@@ -578,7 +598,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
578
598
 
579
599
  %%
580
600
  program : {
581
- lex_state = EXPR_BEG;
601
+ SET_LEX_STATE(EXPR_BEG);
582
602
  local_push(0);
583
603
  class_nest = 0;
584
604
  }
@@ -702,7 +722,7 @@ stmt_or_begin : stmt
702
722
  }
703
723
  ;
704
724
 
705
- stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem
725
+ stmt : keyword_alias fitem {SET_LEX_STATE(EXPR_FNAME | EXPR_FITEM);} fitem
706
726
  {
707
727
  $$ = NEW_ALIAS($2, $4);
708
728
  }
@@ -795,15 +815,15 @@ stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem
795
815
  $$ = NEW_OP_ASGN1($1, $5, args);
796
816
  fixpos($$, $1);
797
817
  }
798
- | primary_value '.' tIDENTIFIER tOP_ASGN command_call
818
+ | primary_value call_op tIDENTIFIER tOP_ASGN command_call
799
819
  {
800
820
  value_expr($5);
801
- $$ = new_attr_op_assign($1, $3, $4, $5);
821
+ $$ = new_attr_op_assign($1, $2, $3, $4, $5);
802
822
  }
803
- | primary_value '.' tCONSTANT tOP_ASGN command_call
823
+ | primary_value call_op tCONSTANT tOP_ASGN command_call
804
824
  {
805
825
  value_expr($5);
806
- $$ = new_attr_op_assign($1, $3, $4, $5);
826
+ $$ = new_attr_op_assign($1, $2, $3, $4, $5);
807
827
  }
808
828
  | primary_value tCOLON2 tCONSTANT tOP_ASGN command_call
809
829
  {
@@ -813,7 +833,7 @@ stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem
813
833
  | primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call
814
834
  {
815
835
  value_expr($5);
816
- $$ = new_attr_op_assign($1, $3, $4, $5);
836
+ $$ = new_attr_op_assign($1, parser_intern("::"), $3, $4, $5);
817
837
  }
818
838
  | backref tOP_ASGN command_call
819
839
  {
@@ -878,9 +898,9 @@ command_call : command
878
898
  ;
879
899
 
880
900
  block_command : block_call
881
- | block_call dot_or_colon operation2 command_args
901
+ | block_call call_op2 operation2 command_args
882
902
  {
883
- $$ = NEW_CALL($1, $3, $4);
903
+ $$ = NEW_QCALL($2, $1, $3, $4);
884
904
  }
885
905
  ;
886
906
 
@@ -919,15 +939,15 @@ command : fcall command_args %prec tLOWEST
919
939
  $$ = $3;
920
940
  fixpos($$, $1);
921
941
  }
922
- | primary_value '.' operation2 command_args %prec tLOWEST
942
+ | primary_value call_op operation2 command_args %prec tLOWEST
923
943
  {
924
- $$ = NEW_CALL($1, $3, $4);
944
+ $$ = NEW_QCALL($2, $1, $3, $4);
925
945
  fixpos($$, $1);
926
946
  }
927
- | primary_value '.' operation2 command_args cmd_brace_block
947
+ | primary_value call_op operation2 command_args cmd_brace_block
928
948
  {
929
949
  block_dup_check($4, $5);
930
- $5->nd_iter = NEW_CALL($1, $3, $4);
950
+ $5->nd_iter = NEW_QCALL($2, $1, $3, $4);
931
951
  $$ = $5;
932
952
  fixpos($$, $1);
933
953
  }
@@ -1062,17 +1082,17 @@ mlhs_node : user_variable
1062
1082
  {
1063
1083
  $$ = aryset($1, $3);
1064
1084
  }
1065
- | primary_value '.' tIDENTIFIER
1085
+ | primary_value call_op tIDENTIFIER
1066
1086
  {
1067
- $$ = attrset($1, $3);
1087
+ $$ = attrset($1, $2, $3);
1068
1088
  }
1069
1089
  | primary_value tCOLON2 tIDENTIFIER
1070
1090
  {
1071
- $$ = attrset($1, $3);
1091
+ $$ = attrset($1, parser_intern("::"), $3);
1072
1092
  }
1073
- | primary_value '.' tCONSTANT
1093
+ | primary_value call_op tCONSTANT
1074
1094
  {
1075
- $$ = attrset($1, $3);
1095
+ $$ = attrset($1, $2, $3);
1076
1096
  }
1077
1097
  | primary_value tCOLON2 tCONSTANT
1078
1098
  {
@@ -1107,17 +1127,17 @@ lhs : user_variable
1107
1127
  {
1108
1128
  $$ = aryset($1, $3);
1109
1129
  }
1110
- | primary_value '.' tIDENTIFIER
1130
+ | primary_value call_op tIDENTIFIER
1111
1131
  {
1112
- $$ = attrset($1, $3);
1132
+ $$ = attrset($1, $2, $3);
1113
1133
  }
1114
1134
  | primary_value tCOLON2 tIDENTIFIER
1115
1135
  {
1116
- $$ = attrset($1, $3);
1136
+ $$ = attrset($1, parser_intern("::"), $3);
1117
1137
  }
1118
- | primary_value '.' tCONSTANT
1138
+ | primary_value call_op tCONSTANT
1119
1139
  {
1120
- $$ = attrset($1, $3);
1140
+ $$ = attrset($1, $2, $3);
1121
1141
  }
1122
1142
  | primary_value tCOLON2 tCONSTANT
1123
1143
  {
@@ -1164,12 +1184,12 @@ fname : tIDENTIFIER
1164
1184
  | tFID
1165
1185
  | op
1166
1186
  {
1167
- lex_state = EXPR_ENDFN;
1187
+ SET_LEX_STATE(EXPR_ENDFN);
1168
1188
  $$ = convert_op($1);
1169
1189
  }
1170
1190
  | reswords
1171
1191
  {
1172
- lex_state = EXPR_ENDFN;
1192
+ SET_LEX_STATE(EXPR_ENDFN);
1173
1193
  $$ = $<id>1;
1174
1194
  }
1175
1195
  ;
@@ -1189,7 +1209,7 @@ undef_list : fitem
1189
1209
  {
1190
1210
  $$ = NEW_UNDEF($1);
1191
1211
  }
1192
- | undef_list ',' {lex_state = EXPR_FNAME;} fitem
1212
+ | undef_list ',' {SET_LEX_STATE(EXPR_FNAME | EXPR_FITEM);} fitem
1193
1213
  {
1194
1214
  $$ = block_append($1, NEW_UNDEF($4));
1195
1215
  }
@@ -1284,20 +1304,20 @@ arg : lhs '=' arg
1284
1304
  $$ = NEW_OP_ASGN1($1, $5, args);
1285
1305
  fixpos($$, $1);
1286
1306
  }
1287
- | primary_value '.' tIDENTIFIER tOP_ASGN arg
1307
+ | primary_value call_op tIDENTIFIER tOP_ASGN arg
1288
1308
  {
1289
1309
  value_expr($5);
1290
- $$ = new_attr_op_assign($1, $3, $4, $5);
1310
+ $$ = new_attr_op_assign($1, $2, $3, $4, $5);
1291
1311
  }
1292
- | primary_value '.' tCONSTANT tOP_ASGN arg
1312
+ | primary_value call_op tCONSTANT tOP_ASGN arg
1293
1313
  {
1294
1314
  value_expr($5);
1295
- $$ = new_attr_op_assign($1, $3, $4, $5);
1315
+ $$ = new_attr_op_assign($1, $2, $3, $4, $5);
1296
1316
  }
1297
1317
  | primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg
1298
1318
  {
1299
1319
  value_expr($5);
1300
- $$ = new_attr_op_assign($1, $3, $4, $5);
1320
+ $$ = new_attr_op_assign($1, parser_intern("::"), $3, $4, $5);
1301
1321
  }
1302
1322
  | primary_value tCOLON2 tCONSTANT tOP_ASGN arg
1303
1323
  {
@@ -1521,12 +1541,12 @@ call_args : command
1521
1541
  }
1522
1542
  | assocs opt_block_arg
1523
1543
  {
1524
- $$ = NEW_LIST(NEW_HASH($1));
1544
+ $$ = NEW_LIST($1 ? NEW_HASH($1) : 0);
1525
1545
  $$ = arg_blk_pass($$, $2);
1526
1546
  }
1527
1547
  | args ',' assocs opt_block_arg
1528
1548
  {
1529
- $$ = arg_append($1, NEW_HASH($3));
1549
+ $$ = $3 ? arg_append($1, NEW_HASH($3)) : $1;
1530
1550
  $$ = arg_blk_pass($$, $4);
1531
1551
  }
1532
1552
  | block_arg
@@ -1650,13 +1670,19 @@ primary : literal
1650
1670
  }
1651
1671
  nd_set_line($$, $<num>2);
1652
1672
  }
1653
- | tLPAREN_ARG {lex_state = EXPR_ENDARG;} rparen
1673
+ | tLPAREN_ARG {SET_LEX_STATE(EXPR_ENDARG);} rparen
1654
1674
  {
1655
1675
  $$ = 0;
1656
1676
  }
1657
- | tLPAREN_ARG expr {lex_state = EXPR_ENDARG;} rparen
1677
+ | tLPAREN_ARG
1658
1678
  {
1659
- $$ = $2;
1679
+ $<val>1 = cmdarg_stack;
1680
+ cmdarg_stack = 0;
1681
+ }
1682
+ expr {SET_LEX_STATE(EXPR_ENDARG);} rparen
1683
+ {
1684
+ cmdarg_stack = $<val>1;
1685
+ $$ = $3;
1660
1686
  }
1661
1687
  | tLPAREN compstmt ')'
1662
1688
  {
@@ -1861,11 +1887,14 @@ primary : literal
1861
1887
  in_def--;
1862
1888
  cur_mid = $<id>3;
1863
1889
  }
1864
- | k_def singleton dot_or_colon {lex_state = EXPR_FNAME;} fname
1890
+ | k_def singleton dot_or_colon {SET_LEX_STATE(EXPR_FNAME);} fname
1865
1891
  {
1866
- in_single++;
1867
- lex_state = EXPR_ENDFN; /* force for args */
1892
+ $<num>4 = in_single;
1893
+ in_single = 1;
1894
+ SET_LEX_STATE(EXPR_ENDFN | EXPR_LABEL); /* force for args */
1868
1895
  local_push(0);
1896
+ $<id>$ = current_arg;
1897
+ current_arg = 0;
1869
1898
  }
1870
1899
  f_arglist
1871
1900
  bodystmt
@@ -1875,7 +1904,8 @@ primary : literal
1875
1904
  $$ = NEW_DEFS($2, $5, $7, body);
1876
1905
  nd_set_line($$, $<num>1);
1877
1906
  local_pop();
1878
- in_single--;
1907
+ in_single = $<num>4 & 1;
1908
+ current_arg = $<id>6;
1879
1909
  }
1880
1910
  | keyword_break
1881
1911
  {
@@ -2179,7 +2209,7 @@ opt_bv_decl : opt_nl
2179
2209
  {
2180
2210
  $$ = 0;
2181
2211
  }
2182
- | opt_nl ';' bv_decls
2212
+ | opt_nl ';' bv_decls opt_nl
2183
2213
  {
2184
2214
  // This is deliberately different than MRI.
2185
2215
  $$ = $3;
@@ -2276,21 +2306,21 @@ block_call : command do_block
2276
2306
  $$ = $2;
2277
2307
  fixpos($$, $1);
2278
2308
  }
2279
- | block_call dot_or_colon operation2 opt_paren_args
2309
+ | block_call call_op2 operation2 opt_paren_args
2280
2310
  {
2281
- $$ = NEW_CALL($1, $3, $4);
2311
+ $$ = NEW_QCALL($2, $1, $3, $4);
2282
2312
  }
2283
- | block_call dot_or_colon operation2 opt_paren_args brace_block
2313
+ | block_call call_op2 operation2 opt_paren_args brace_block
2284
2314
  {
2285
2315
  block_dup_check($4, $5);
2286
- $5->nd_iter = NEW_CALL($1, $3, $4);
2316
+ $5->nd_iter = NEW_QCALL($2, $1, $3, $4);
2287
2317
  $$ = $5;
2288
2318
  fixpos($$, $1);
2289
2319
  }
2290
- | block_call dot_or_colon operation2 command_args do_block
2320
+ | block_call call_op2 operation2 command_args do_block
2291
2321
  {
2292
2322
  block_dup_check($4, $5);
2293
- $5->nd_iter = NEW_CALL($1, $3, $4);
2323
+ $5->nd_iter = NEW_QCALL($2, $1, $3, $4);
2294
2324
  $$ = $5;
2295
2325
  fixpos($$, $1);
2296
2326
  }
@@ -2302,13 +2332,13 @@ method_call : fcall paren_args
2302
2332
  $$->nd_args = $2;
2303
2333
  fixpos($$, $2);
2304
2334
  }
2305
- | primary_value '.' operation2
2335
+ | primary_value call_op operation2
2306
2336
  {
2307
2337
  $<num>$ = sourceline;
2308
2338
  }
2309
2339
  opt_paren_args
2310
2340
  {
2311
- $$ = NEW_CALL($1, $3, $5);
2341
+ $$ = NEW_QCALL($2, $1, $3, $5);
2312
2342
  nd_set_line($$, $<num>4);
2313
2343
  }
2314
2344
  | primary_value tCOLON2 operation2
@@ -2324,13 +2354,13 @@ method_call : fcall paren_args
2324
2354
  {
2325
2355
  $$ = NEW_CALL($1, $3, 0);
2326
2356
  }
2327
- | primary_value '.'
2357
+ | primary_value call_op
2328
2358
  {
2329
2359
  $<num>$ = sourceline;
2330
2360
  }
2331
2361
  paren_args
2332
2362
  {
2333
- $$ = NEW_CALL($1, parser_intern("call"), $4);
2363
+ $$ = NEW_QCALL($2, $1, parser_intern("call"), $4);
2334
2364
  nd_set_line($$, $<num>3);
2335
2365
  }
2336
2366
  | primary_value tCOLON2
@@ -2469,6 +2499,8 @@ string : tCHAR
2469
2499
 
2470
2500
  string1 : tSTRING_BEG string_contents tSTRING_END
2471
2501
  {
2502
+ heredoc_dedent($2);
2503
+ heredoc_indent = 0;
2472
2504
  $$ = $2;
2473
2505
  }
2474
2506
  ;
@@ -2476,6 +2508,10 @@ string1 : tSTRING_BEG string_contents tSTRING_END
2476
2508
  xstring : tXSTRING_BEG xstring_contents tSTRING_END
2477
2509
  {
2478
2510
  NODE *node = $2;
2511
+
2512
+ heredoc_dedent($2);
2513
+ heredoc_indent = 0;
2514
+
2479
2515
  if(!node) {
2480
2516
  node = NEW_XSTR(STR_NEW0());
2481
2517
  } else {
@@ -2700,7 +2736,7 @@ string_content : tSTRING_CONTENT
2700
2736
  {
2701
2737
  $<node>$ = lex_strterm;
2702
2738
  lex_strterm = 0;
2703
- lex_state = EXPR_BEG;
2739
+ SET_LEX_STATE(EXPR_BEG);
2704
2740
  }
2705
2741
  string_dvar
2706
2742
  {
@@ -2717,21 +2753,31 @@ string_content : tSTRING_CONTENT
2717
2753
  {
2718
2754
  $<node>$ = lex_strterm;
2719
2755
  lex_strterm = 0;
2720
- lex_state = EXPR_BEG;
2756
+ }
2757
+ {
2758
+ $<num>$ = lex_state;
2759
+ SET_LEX_STATE(EXPR_BEG);
2721
2760
  }
2722
2761
  {
2723
2762
  $<num>$ = brace_nest;
2724
2763
  brace_nest = 0;
2725
2764
  }
2765
+ {
2766
+ $<num>$ = heredoc_indent;
2767
+ heredoc_indent = 0;
2768
+ }
2726
2769
  compstmt tSTRING_DEND
2727
2770
  {
2728
2771
  cond_stack = $<val>1;
2729
2772
  cmdarg_stack = $<val>2;
2730
2773
  lex_strterm = $<node>3;
2731
- brace_nest = $<num>4;
2774
+ SET_LEX_STATE($<num>4);
2775
+ brace_nest = $<num>5;
2776
+ heredoc_indent = $<num>6;
2777
+ heredoc_line_indent = -1;
2732
2778
 
2733
- if($5) $5->flags &= ~NODE_FL_NEWLINE;
2734
- $$ = new_evstr($5);
2779
+ if($7) $7->flags &= ~NODE_FL_NEWLINE;
2780
+ $$ = new_evstr($7);
2735
2781
  }
2736
2782
  ;
2737
2783
 
@@ -2743,7 +2789,7 @@ string_dvar : tGVAR {$$ = NEW_GVAR($1);}
2743
2789
 
2744
2790
  symbol : tSYMBEG sym
2745
2791
  {
2746
- lex_state = EXPR_END;
2792
+ SET_LEX_STATE(EXPR_END);
2747
2793
  $$ = $2;
2748
2794
  }
2749
2795
  ;
@@ -2756,7 +2802,7 @@ sym : fname
2756
2802
 
2757
2803
  dsym : tSYMBEG xstring_contents tSTRING_END
2758
2804
  {
2759
- lex_state = EXPR_END;
2805
+ SET_LEX_STATE(EXPR_END);
2760
2806
  $$ = dsym_node($2);
2761
2807
  }
2762
2808
  ;
@@ -2818,20 +2864,16 @@ backref : tNTH_REF
2818
2864
  | tBACK_REF
2819
2865
  ;
2820
2866
 
2821
- superclass : term
2867
+ superclass : '<'
2822
2868
  {
2823
- $$ = 0;
2824
- }
2825
- | '<'
2826
- {
2827
- lex_state = EXPR_BEG;
2869
+ SET_LEX_STATE(EXPR_BEG);
2828
2870
  command_start = TRUE;
2829
2871
  }
2830
2872
  expr_value term
2831
2873
  {
2832
2874
  $$ = $3;
2833
2875
  }
2834
- | error term
2876
+ | /* none */
2835
2877
  {
2836
2878
  yyerrok;
2837
2879
  $$ = 0;
@@ -2841,13 +2883,19 @@ superclass : term
2841
2883
  f_arglist : '(' f_args rparen
2842
2884
  {
2843
2885
  $$ = $2;
2844
- lex_state = EXPR_BEG;
2886
+ SET_LEX_STATE(EXPR_BEG);
2845
2887
  command_start = TRUE;
2846
2888
  }
2847
- | f_args term
2889
+ | {
2890
+ $<num>$ = in_kwarg;
2891
+ in_kwarg = 1;
2892
+ SET_LEX_STATE(lex_state | EXPR_LABEL); /* force for args */
2893
+ }
2894
+ f_args term
2848
2895
  {
2849
- $$ = $1;
2850
- lex_state = EXPR_BEG;
2896
+ in_kwarg = !!$<num>1;
2897
+ $$ = $2;
2898
+ SET_LEX_STATE(EXPR_BEG);
2851
2899
  command_start = TRUE;
2852
2900
  }
2853
2901
  ;
@@ -2973,9 +3021,18 @@ f_norm_arg : f_bad_arg
2973
3021
  }
2974
3022
  ;
2975
3023
 
2976
- f_arg_item : f_norm_arg
3024
+ f_arg_asgn : f_norm_arg
3025
+ {
3026
+ ID id = get_id($1);
3027
+ arg_var(id);
3028
+ current_arg = id;
3029
+ $$ = $1;
3030
+ }
3031
+ ;
3032
+
3033
+ f_arg_item : f_arg_asgn
2977
3034
  {
2978
- arg_var(get_id($1));
3035
+ current_arg = 0;
2979
3036
  $$ = NEW_ARGS_AUX($1, 1);
2980
3037
  }
2981
3038
  | tLPAREN f_margs rparen
@@ -2999,18 +3056,22 @@ f_arg : f_arg_item
2999
3056
 
3000
3057
  f_label : tLABEL
3001
3058
  {
3002
- arg_var(formal_argument(get_id($1)));
3059
+ ID id = get_id($1);
3060
+ arg_var(formal_argument(id));
3061
+ current_arg = id;
3003
3062
  $$ = $1;
3004
3063
  }
3005
3064
  ;
3006
3065
 
3007
3066
  f_kw : f_label arg_value
3008
3067
  {
3068
+ current_arg = 0;
3009
3069
  $$ = assignable($1, $2);
3010
3070
  $$ = NEW_KW_ARG(0, $$);
3011
3071
  }
3012
3072
  | f_label
3013
3073
  {
3074
+ current_arg = 0;
3014
3075
  $$ = assignable($1, NEW_REQ_KW);
3015
3076
  $$ = NEW_KW_ARG(0, $$);
3016
3077
  }
@@ -3035,7 +3096,7 @@ f_block_kwarg : f_block_kw
3035
3096
  | f_block_kwarg ',' f_block_kw
3036
3097
  {
3037
3098
  NODE *kws = $1;
3038
- while (kws->nd_next) {
3099
+ while(kws->nd_next) {
3039
3100
  kws = kws->nd_next;
3040
3101
  }
3041
3102
  kws->nd_next = $3;
@@ -3050,7 +3111,7 @@ f_kwarg : f_kw
3050
3111
  | f_kwarg ',' f_kw
3051
3112
  {
3052
3113
  NODE *kws = $1;
3053
- while (kws->nd_next) {
3114
+ while(kws->nd_next) {
3054
3115
  kws = kws->nd_next;
3055
3116
  }
3056
3117
  kws->nd_next = $3;
@@ -3070,20 +3131,21 @@ f_kwrest : kwrest_mark tIDENTIFIER
3070
3131
  | kwrest_mark
3071
3132
  {
3072
3133
  $$ = internal_id();
3134
+ arg_var($$);
3073
3135
  }
3074
3136
  ;
3075
3137
 
3076
- f_opt : f_norm_arg '=' arg_value
3138
+ f_opt : f_arg_asgn '=' arg_value
3077
3139
  {
3078
- arg_var(get_id($1));
3140
+ current_arg = 0;
3079
3141
  $$ = assignable($1, $3);
3080
3142
  $$ = NEW_OPT_ARG(0, $$);
3081
3143
  }
3082
3144
  ;
3083
3145
 
3084
- f_block_opt : f_norm_arg '=' primary_value
3146
+ f_block_opt : f_arg_asgn '=' primary_value
3085
3147
  {
3086
- arg_var(get_id($1));
3148
+ current_arg = 0;
3087
3149
  $$ = assignable($1, $3);
3088
3150
  $$ = NEW_OPT_ARG(0, $$);
3089
3151
  }
@@ -3169,7 +3231,7 @@ singleton : var_ref
3169
3231
  $$ = $1;
3170
3232
  if(!$$) $$ = NEW_NIL();
3171
3233
  }
3172
- | '(' {lex_state = EXPR_BEG;} expr rparen
3234
+ | '(' {SET_LEX_STATE(EXPR_BEG);} expr rparen
3173
3235
  {
3174
3236
  if($3 == 0) {
3175
3237
  yy_error("can't define singleton method for ().");
@@ -3245,6 +3307,23 @@ dot_or_colon : '.'
3245
3307
  | tCOLON2
3246
3308
  ;
3247
3309
 
3310
+ call_op : '.'
3311
+ {
3312
+ $$ = '.';
3313
+ }
3314
+ | tANDDOT
3315
+ {
3316
+ $$ = tANDDOT;
3317
+ }
3318
+ ;
3319
+
3320
+ call_op2 : call_op
3321
+ | tCOLON2
3322
+ {
3323
+ $$ = tCOLON2;
3324
+ }
3325
+ ;
3326
+
3248
3327
  opt_terms : /* none */
3249
3328
  | terms
3250
3329
  ;
@@ -3341,6 +3420,28 @@ static int parser_here_document(rb_parser_state*, NODE*);
3341
3420
 
3342
3421
  #define parser_isascii() ISASCII(*(lex_p-1))
3343
3422
 
3423
+ static int token_info_get_column(rb_parser_state* parser_state, const char *pend) {
3424
+ int col = 1;
3425
+ const char *p;
3426
+ for(p = lex_pbeg; p < pend; p++) {
3427
+ if(*p == '\t') {
3428
+ col = (((col - 1) / TAB_WIDTH) + 1) * TAB_WIDTH;
3429
+ }
3430
+ col++;
3431
+ }
3432
+ return col;
3433
+ }
3434
+
3435
+ static int token_info_has_nonspaces(rb_parser_state* parser_state, const char *pend) {
3436
+ const char *p;
3437
+ for(p = lex_pbeg; p < pend; p++) {
3438
+ if(*p != ' ' && *p != '\t') {
3439
+ return 1;
3440
+ }
3441
+ }
3442
+ return 0;
3443
+ }
3444
+
3344
3445
  static void parser_token_info_push(rb_parser_state* parser_state, const char *token) {
3345
3446
  /* TODO */
3346
3447
  }
@@ -3395,21 +3496,21 @@ must_be_ascii_compatible(VALUE s)
3395
3496
  static VALUE
3396
3497
  lex_get_str(rb_parser_state* parser_state, VALUE s)
3397
3498
  {
3398
- const char *beg, *end, *pend;
3399
- rb_encoding* enc = must_be_ascii_compatible(s);
3499
+ char *beg, *end, *start;
3500
+ long len;
3400
3501
 
3401
3502
  beg = RSTRING_PTR(s);
3503
+ len = RSTRING_LEN(s);
3504
+ start = beg;
3402
3505
  if(lex_gets_ptr) {
3403
- if(RSTRING_LEN(s) == lex_gets_ptr) return Qnil;
3506
+ if(len == lex_gets_ptr) return Qnil;
3404
3507
  beg += lex_gets_ptr;
3508
+ len -= lex_gets_ptr;
3405
3509
  }
3406
- pend = RSTRING_PTR(s) + RSTRING_LEN(s);
3407
- end = beg;
3408
- while(end < pend) {
3409
- if(*end++ == '\n') break;
3410
- }
3411
- lex_gets_ptr = end - RSTRING_PTR(s);
3412
- return REF(parser_enc_str_new(beg, end - beg, enc));
3510
+ end = (char*)memchr(beg, '\n', len);
3511
+ if(end) len = ++end - beg;
3512
+ lex_gets_ptr += len;
3513
+ return REF(rb_str_subseq(s, beg - start, len));
3413
3514
  }
3414
3515
 
3415
3516
  static VALUE
@@ -3573,10 +3674,10 @@ parser_str_new(rb_parser_state* parser_state, const char *p, long n,
3573
3674
  #define lex_eol_p() (lex_p >= lex_pend)
3574
3675
  #define peek(c) (lex_p < lex_pend && (c) == *lex_p)
3575
3676
  #define peek_n(c,n) (lex_p+(n) < lex_pend && (c) == (unsigned char)lex_p[n])
3677
+ #define peekc() peekc_n(0)
3678
+ #define peekc_n(n) (lex_p+(n) < lex_pend ? (unsigned char)lex_p[n] : -1)
3576
3679
 
3577
- static inline int
3578
- parser_nextc(rb_parser_state* parser_state)
3579
- {
3680
+ static inline int parser_nextc(rb_parser_state* parser_state) {
3580
3681
  int c;
3581
3682
 
3582
3683
  if(lex_p == lex_pend) {
@@ -3614,11 +3715,12 @@ parser_nextc(rb_parser_state* parser_state)
3614
3715
  return c;
3615
3716
  }
3616
3717
 
3617
- static void
3618
- parser_pushback(rb_parser_state* parser_state, int c)
3619
- {
3718
+ static void parser_pushback(rb_parser_state* parser_state, int c) {
3620
3719
  if(c == -1) return;
3621
3720
  lex_p--;
3721
+ if(lex_p > lex_pbeg && lex_p[0] == '\n' && lex_p[-1] == '\r') {
3722
+ lex_p--;
3723
+ }
3622
3724
  }
3623
3725
 
3624
3726
  /* Indicates if we're currently at the beginning of a line. */
@@ -3632,9 +3734,7 @@ parser_pushback(rb_parser_state* parser_state, int c)
3632
3734
  #define toklen() tokidx
3633
3735
  #define toklast() (tokidx>0?tokenbuf[tokidx-1]:0)
3634
3736
 
3635
- static char*
3636
- parser_newtok(rb_parser_state* parser_state)
3637
- {
3737
+ static char* parser_newtok(rb_parser_state* parser_state) {
3638
3738
  tokidx = 0;
3639
3739
  tokline = sourceline;
3640
3740
  if(!tokenbuf) {
@@ -3648,9 +3748,7 @@ parser_newtok(rb_parser_state* parser_state)
3648
3748
  return tokenbuf;
3649
3749
  }
3650
3750
 
3651
- static char *
3652
- parser_tokspace(rb_parser_state *parser_state, int n)
3653
- {
3751
+ static char * parser_tokspace(rb_parser_state* parser_state, int n) {
3654
3752
  tokidx += n;
3655
3753
 
3656
3754
  if(tokidx >= toksiz) {
@@ -3663,8 +3761,7 @@ parser_tokspace(rb_parser_state *parser_state, int n)
3663
3761
  }
3664
3762
 
3665
3763
 
3666
- static void parser_tokadd(rb_parser_state* parser_state, char c)
3667
- {
3764
+ static void parser_tokadd(rb_parser_state* parser_state, char c) {
3668
3765
  assert(tokidx < toksiz && tokidx >= 0);
3669
3766
  tokenbuf[tokidx++] = c;
3670
3767
  if(tokidx >= toksiz) {
@@ -3673,9 +3770,7 @@ static void parser_tokadd(rb_parser_state* parser_state, char c)
3673
3770
  }
3674
3771
  }
3675
3772
 
3676
- static int
3677
- parser_tok_hex(rb_parser_state *parser_state, size_t *numlen)
3678
- {
3773
+ static int parser_tok_hex(rb_parser_state* parser_state, size_t *numlen) {
3679
3774
  int c;
3680
3775
 
3681
3776
  c = scan_hex(lex_p, 2, numlen);
@@ -3690,7 +3785,7 @@ parser_tok_hex(rb_parser_state *parser_state, size_t *numlen)
3690
3785
  #define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n))
3691
3786
 
3692
3787
  static int
3693
- parser_tokadd_utf8(rb_parser_state *parser_state, rb_encoding **encp,
3788
+ parser_tokadd_utf8(rb_parser_state* parser_state, rb_encoding** encp,
3694
3789
  int string_literal, int symbol_literal, int regexp_literal)
3695
3790
  {
3696
3791
  /*
@@ -3740,7 +3835,8 @@ parser_tokadd_utf8(rb_parser_state *parser_state, rb_encoding **encp,
3740
3835
 
3741
3836
  if(regexp_literal) tokadd('}');
3742
3837
  nextc();
3743
- } else { /* handle \uxxxx form */
3838
+ } else {
3839
+ /* handle \uxxxx form */
3744
3840
  codepoint = scan_hex(lex_p, 4, &numlen);
3745
3841
  if(numlen < 4) {
3746
3842
  yy_error("invalid Unicode escape");
@@ -3763,55 +3859,46 @@ parser_tokadd_utf8(rb_parser_state *parser_state, rb_encoding **encp,
3763
3859
  #define ESCAPE_CONTROL 1
3764
3860
  #define ESCAPE_META 2
3765
3861
 
3766
- static int
3767
- parser_read_escape(rb_parser_state *parser_state, int flags, rb_encoding **encp)
3862
+ static int parser_read_escape(rb_parser_state* parser_state,
3863
+ int flags, rb_encoding **encp)
3768
3864
  {
3769
3865
  int c;
3770
3866
  size_t numlen;
3771
3867
 
3772
3868
  switch(c = nextc()) {
3773
- case '\\': /* Backslash */
3869
+ case '\\': /* Backslash */
3774
3870
  return c;
3775
-
3776
- case 'n': /* newline */
3871
+ case 'n': /* newline */
3777
3872
  return '\n';
3778
-
3779
- case 't': /* horizontal tab */
3873
+ case 't': /* horizontal tab */
3780
3874
  return '\t';
3781
-
3782
- case 'r': /* carriage-return */
3875
+ case 'r': /* carriage-return */
3783
3876
  return '\r';
3784
-
3785
- case 'f': /* form-feed */
3877
+ case 'f': /* form-feed */
3786
3878
  return '\f';
3787
-
3788
- case 'v': /* vertical tab */
3879
+ case 'v': /* vertical tab */
3789
3880
  return '\13';
3790
-
3791
- case 'a': /* alarm(bell) */
3881
+ case 'a': /* alarm(bell) */
3792
3882
  return '\007';
3793
-
3794
- case 'e': /* escape */
3883
+ case 'e': /* escape */
3795
3884
  return 033;
3796
3885
 
3797
3886
  case '0': case '1': case '2': case '3': /* octal constant */
3798
3887
  case '4': case '5': case '6': case '7':
3799
- if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
3800
3888
  pushback(c);
3801
3889
  c = scan_oct(lex_p, 3, &numlen);
3802
3890
  lex_p += numlen;
3803
3891
  return c;
3804
3892
 
3805
- case 'x': /* hex constant */
3806
- if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
3893
+ case 'x': /* hex constant */
3807
3894
  c = tok_hex(&numlen);
3808
3895
  if(numlen == 0) return 0;
3809
3896
  return c;
3810
3897
 
3811
- case 'b': /* backspace */
3898
+ case 'b': /* backspace */
3812
3899
  return '\010';
3813
3900
 
3814
- case 's': /* space */
3901
+ case 's': /* space */
3815
3902
  return ' ';
3816
3903
 
3817
3904
  case 'M':
@@ -3856,16 +3943,12 @@ parser_read_escape(rb_parser_state *parser_state, int flags, rb_encoding **encp)
3856
3943
  }
3857
3944
  }
3858
3945
 
3859
- static void
3860
- parser_tokaddmbc(rb_parser_state* parser_state, int c, rb_encoding *enc)
3861
- {
3946
+ static void parser_tokaddmbc(rb_parser_state* parser_state, int c, rb_encoding *enc) {
3862
3947
  int len = parser_enc_codelen(c, enc);
3863
3948
  parser_enc_mbcput(c, tokspace(len), enc);
3864
3949
  }
3865
3950
 
3866
- static int
3867
- parser_tokadd_escape(rb_parser_state* parser_state, rb_encoding **encp)
3868
- {
3951
+ static int parser_tokadd_escape(rb_parser_state* parser_state, rb_encoding **encp) {
3869
3952
  int c;
3870
3953
  int flags = 0;
3871
3954
  size_t numlen;
@@ -3873,11 +3956,10 @@ parser_tokadd_escape(rb_parser_state* parser_state, rb_encoding **encp)
3873
3956
  first:
3874
3957
  switch(c = nextc()) {
3875
3958
  case '\n':
3876
- return 0; /* just ignore */
3959
+ return 0; /* just ignore */
3877
3960
 
3878
3961
  case '0': case '1': case '2': case '3': /* octal constant */
3879
3962
  case '4': case '5': case '6': case '7':
3880
- if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
3881
3963
  {
3882
3964
  scan_oct(--lex_p, 3, &numlen);
3883
3965
  if(numlen == 0) goto eof;
@@ -3886,8 +3968,7 @@ first:
3886
3968
  }
3887
3969
  return 0;
3888
3970
 
3889
- case 'x': /* hex constant */
3890
- if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
3971
+ case 'x': /* hex constant */
3891
3972
  {
3892
3973
  tok_hex(&numlen);
3893
3974
  if(numlen == 0) goto eof;
@@ -3938,9 +4019,7 @@ eof:
3938
4019
  return 0;
3939
4020
  }
3940
4021
 
3941
- static int
3942
- parser_regx_options(rb_parser_state* parser_state)
3943
- {
4022
+ static int parser_regx_options(rb_parser_state* parser_state) {
3944
4023
  int kcode = 0;
3945
4024
  int options = 0;
3946
4025
  int c;
@@ -3993,9 +4072,7 @@ parser_regx_options(rb_parser_state* parser_state)
3993
4072
  return options | kcode;
3994
4073
  }
3995
4074
 
3996
- static int
3997
- parser_tokadd_mbchar(rb_parser_state *parser_state, int c)
3998
- {
4075
+ static int parser_tokadd_mbchar(rb_parser_state* parser_state, int c) {
3999
4076
  int len = parser_precise_mbclen();
4000
4077
  if(!MBCLEN_CHARFOUND_P(len)) {
4001
4078
  rb_compile_error(parser_state, "invalid multibyte char (%s)", parser_encoding_name());
@@ -4009,9 +4086,42 @@ parser_tokadd_mbchar(rb_parser_state *parser_state, int c)
4009
4086
 
4010
4087
  #define tokadd_mbchar(c) parser_tokadd_mbchar(parser_state, c)
4011
4088
 
4089
+ static inline int simple_re_meta(int c) {
4090
+ switch(c) {
4091
+ case '$': case '*': case '+': case '.':
4092
+ case '?': case '^': case '|':
4093
+ case ')': case ']': case '}': case '>':
4094
+ return TRUE;
4095
+ default:
4096
+ return FALSE;
4097
+ }
4098
+ }
4099
+
4100
+ static int parser_update_heredoc_indent(rb_parser_state* parser_state, int c) {
4101
+ if(heredoc_line_indent == -1) {
4102
+ if(c == '\n') heredoc_line_indent = 0;
4103
+ } else {
4104
+ if(c == ' ') {
4105
+ heredoc_line_indent++;
4106
+ return TRUE;
4107
+ } else if(c == '\t') {
4108
+ int w = (heredoc_line_indent / TAB_WIDTH) + 1;
4109
+ heredoc_line_indent = w * TAB_WIDTH;
4110
+ return TRUE;
4111
+ } else if(c != '\n') {
4112
+ if(heredoc_indent > heredoc_line_indent) {
4113
+ heredoc_indent = heredoc_line_indent;
4114
+ }
4115
+ heredoc_line_indent = -1;
4116
+ }
4117
+ }
4118
+ return FALSE;
4119
+ }
4120
+
4012
4121
  static int
4013
- parser_tokadd_string(rb_parser_state *parser_state,
4014
- int func, int term, int paren, long *nest, rb_encoding **encp)
4122
+ parser_tokadd_string(rb_parser_state* parser_state,
4123
+ int func, int term, int paren, long *nest,
4124
+ rb_encoding **encp)
4015
4125
  {
4016
4126
  int c;
4017
4127
  int has_nonascii = 0;
@@ -4019,111 +4129,123 @@ parser_tokadd_string(rb_parser_state *parser_state,
4019
4129
  char *errbuf = 0;
4020
4130
  static const char mixed_msg[] = "%s mixed within %s source";
4021
4131
 
4022
- #define mixed_error(enc1, enc2) if(!errbuf) { \
4023
- size_t len = sizeof(mixed_msg) - 4; \
4024
- len += strlen(parser_enc_name(enc1)); \
4025
- len += strlen(parser_enc_name(enc2)); \
4026
- errbuf = ALLOCA_N(char, len); \
4027
- snprintf(errbuf, len, mixed_msg, parser_enc_name(enc1), parser_enc_name(enc2)); \
4028
- yy_error(errbuf); \
4029
- }
4030
-
4031
- #define mixed_escape(beg, enc1, enc2) do { \
4032
- const char *pos = lex_p; \
4033
- lex_p = beg; \
4034
- mixed_error(enc1, enc2); \
4035
- lex_p = pos; \
4036
- } while(0)
4037
-
4038
- while((c = nextc()) != -1) {
4039
- if(paren && c == paren) {
4040
- ++*nest;
4041
- } else if(c == term) {
4042
- if(!nest || !*nest) {
4043
- pushback(c);
4044
- break;
4045
- }
4046
- --*nest;
4047
- } else if((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) {
4048
- int c2 = *lex_p;
4049
- if(c2 == '$' || c2 == '@' || c2 == '{') {
4050
- pushback(c);
4051
- break;
4132
+ #define mixed_error(enc1, enc2) if(!errbuf) { \
4133
+ size_t len = sizeof(mixed_msg) - 4; \
4134
+ len += strlen(rb_enc_name(enc1)); \
4135
+ len += strlen(rb_enc_name(enc2)); \
4136
+ errbuf = ALLOCA_N(char, len); \
4137
+ snprintf(errbuf, len, mixed_msg, \
4138
+ rb_enc_name(enc1), \
4139
+ rb_enc_name(enc2)); \
4140
+ yy_error(errbuf); \
4141
+ }
4142
+ #define mixed_escape(beg, enc1, enc2) do { \
4143
+ const char *pos = lex_p; \
4144
+ lex_p = (beg); \
4145
+ mixed_error((enc1), (enc2)); \
4146
+ lex_p = pos; \
4147
+ } while(0)
4148
+
4149
+ while((c = nextc()) != -1) {
4150
+ if(heredoc_indent > 0) {
4151
+ parser_update_heredoc_indent(parser_state, c);
4052
4152
  }
4053
- } else if(c == '\\') {
4054
- const char *beg = lex_p - 1;
4055
- c = nextc();
4056
- switch(c) {
4057
- case '\n':
4058
- if(func & STR_FUNC_QWORDS) break;
4059
- if(func & STR_FUNC_EXPAND) continue;
4060
- tokadd('\\');
4061
- break;
4062
-
4063
- case '\\':
4064
- if(func & STR_FUNC_ESCAPE) tokadd(c);
4065
- break;
4066
-
4067
- case 'u':
4068
- if((func & STR_FUNC_EXPAND) == 0) {
4069
- tokadd('\\');
4153
+ if(paren && c == paren) {
4154
+ ++*nest;
4155
+ } else if(c == term) {
4156
+ if(!nest || !*nest) {
4157
+ pushback(c);
4070
4158
  break;
4071
4159
  }
4072
- parser_tokadd_utf8(parser_state, &enc, 1, func & STR_FUNC_SYMBOL,
4073
- func & STR_FUNC_REGEXP);
4074
- if(has_nonascii && enc != *encp) {
4075
- mixed_escape(beg, enc, *encp);
4160
+ --*nest;
4161
+ } else if((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) {
4162
+ int c2 = *lex_p;
4163
+ if(c2 == '$' || c2 == '@' || c2 == '{') {
4164
+ pushback(c);
4165
+ break;
4076
4166
  }
4077
- continue;
4167
+ } else if(c == '\\') {
4168
+ const char *beg = lex_p - 1;
4169
+ c = nextc();
4170
+ switch (c) {
4171
+ case '\n':
4172
+ if(func & STR_FUNC_QWORDS) break;
4173
+ if(func & STR_FUNC_EXPAND) continue;
4174
+ tokadd('\\');
4175
+ break;
4078
4176
 
4079
- default:
4080
- if(func & STR_FUNC_REGEXP) {
4081
- pushback(c);
4082
- if((c = tokadd_escape(&enc)) < 0)
4083
- return -1;
4177
+ case '\\':
4178
+ if(func & STR_FUNC_ESCAPE) tokadd(c);
4179
+ break;
4180
+
4181
+ case 'u':
4182
+ if((func & STR_FUNC_EXPAND) == 0) {
4183
+ tokadd('\\');
4184
+ break;
4185
+ }
4186
+ parser_tokadd_utf8(parser_state, &enc, 1,
4187
+ func & STR_FUNC_SYMBOL,
4188
+ func & STR_FUNC_REGEXP);
4084
4189
  if(has_nonascii && enc != *encp) {
4085
4190
  mixed_escape(beg, enc, *encp);
4086
4191
  }
4087
4192
  continue;
4088
- } else if(func & STR_FUNC_EXPAND) {
4089
- pushback(c);
4090
- if(func & STR_FUNC_ESCAPE) tokadd('\\');
4091
- c = read_escape(0, &enc);
4193
+
4194
+ default:
4195
+ if(c == -1) return -1;
4092
4196
  if(!ISASCII(c)) {
4093
- if(tokadd_mbchar(c) == -1) return -1;
4197
+ if((func & STR_FUNC_EXPAND) == 0) tokadd('\\');
4198
+ goto non_ascii;
4199
+ }
4200
+ if(func & STR_FUNC_REGEXP) {
4201
+ if(c == term && !simple_re_meta(c)) {
4202
+ tokadd(c);
4203
+ continue;
4204
+ }
4205
+ pushback(c);
4206
+ if((c = tokadd_escape(&enc)) < 0) {
4207
+ return -1;
4208
+ }
4209
+ if(has_nonascii && enc != *encp) {
4210
+ mixed_escape(beg, enc, *encp);
4211
+ }
4212
+ continue;
4213
+ } else if(func & STR_FUNC_EXPAND) {
4214
+ pushback(c);
4215
+ if(func & STR_FUNC_ESCAPE) tokadd('\\');
4216
+ c = read_escape(0, &enc);
4217
+ } else if((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
4218
+ /* ignore backslashed spaces in %w */
4219
+ } else if(c != term && !(paren && c == paren)) {
4220
+ tokadd('\\');
4221
+ pushback(c);
4094
4222
  continue;
4095
4223
  }
4096
- } else if((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
4097
- /* ignore backslashed spaces in %w */
4098
- } else if(c != term && !(paren && c == paren)) {
4099
- tokadd('\\');
4100
- pushback(c);
4224
+ }
4225
+ } else if(!parser_isascii()) {
4226
+ non_ascii:
4227
+ has_nonascii = 1;
4228
+ if(enc != *encp) {
4229
+ mixed_error(enc, *encp);
4101
4230
  continue;
4102
4231
  }
4103
- }
4104
- } else if(!parser_isascii()) {
4105
- has_nonascii = 1;
4106
- if(enc != *encp) {
4107
- mixed_error(enc, *encp);
4232
+ if(tokadd_mbchar(c) == -1) return -1;
4108
4233
  continue;
4234
+ } else if((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
4235
+ pushback(c);
4236
+ break;
4109
4237
  }
4110
- if(tokadd_mbchar(c) == -1) return -1;
4111
- continue;
4112
- } else if((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
4113
- pushback(c);
4114
- break;
4115
- }
4116
- if(c & 0x80) {
4117
- has_nonascii = 1;
4118
- if(enc != *encp) {
4119
- mixed_error(enc, *encp);
4120
- continue;
4238
+ if(c & 0x80) {
4239
+ has_nonascii = 1;
4240
+ if(enc != *encp) {
4241
+ mixed_error(enc, *encp);
4242
+ continue;
4243
+ }
4121
4244
  }
4245
+ tokadd(c);
4122
4246
  }
4123
- tokadd(c);
4124
- }
4125
- *encp = enc;
4126
- return c;
4247
+ *encp = enc;
4248
+ return c;
4127
4249
  }
4128
4250
 
4129
4251
  #define NEW_STRTERM(func, term, paren) \
@@ -4133,12 +4255,12 @@ parser_tokadd_string(rb_parser_state *parser_state,
4133
4255
 
4134
4256
  #define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
4135
4257
  #define SPECIAL_PUNCT(idx) ( \
4136
- BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
4137
- BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
4138
- BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
4139
- BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
4140
- BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
4141
- BIT('0', idx))
4258
+ BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
4259
+ BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
4260
+ BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
4261
+ BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
4262
+ BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
4263
+ BIT('0', idx))
4142
4264
  const unsigned int ruby_global_name_punct_bits[] = {
4143
4265
  SPECIAL_PUNCT(0),
4144
4266
  SPECIAL_PUNCT(1),
@@ -4147,16 +4269,12 @@ const unsigned int ruby_global_name_punct_bits[] = {
4147
4269
  #undef BIT
4148
4270
  #undef SPECIAL_PUNCT
4149
4271
 
4150
- static inline int
4151
- is_global_name_punct(const int c)
4152
- {
4272
+ static inline int is_global_name_punct(const int c) {
4153
4273
  if(c <= 0x20 || 0x7e < c) return 0;
4154
4274
  return (ruby_global_name_punct_bits[(c - 0x20) / 32] >> (c % 32)) & 1;
4155
4275
  }
4156
4276
 
4157
- static int
4158
- parser_peek_variable_name(rb_parser_state* parser_state)
4159
- {
4277
+ static int parser_peek_variable_name(rb_parser_state* parser_state) {
4160
4278
  int c;
4161
4279
  const char *p = lex_p;
4162
4280
 
@@ -4165,7 +4283,7 @@ parser_peek_variable_name(rb_parser_state* parser_state)
4165
4283
  switch(c) {
4166
4284
  case '$':
4167
4285
  if((c = *p) == '-') {
4168
- if (++p >= lex_pend) return 0;
4286
+ if(++p >= lex_pend) return 0;
4169
4287
  c = *p;
4170
4288
  } else if(is_global_name_punct(c) || ISDIGIT(c)) {
4171
4289
  return tSTRING_DVAR;
@@ -4189,9 +4307,7 @@ parser_peek_variable_name(rb_parser_state* parser_state)
4189
4307
  return 0;
4190
4308
  }
4191
4309
 
4192
- static int
4193
- parser_parse_string(rb_parser_state* parser_state, NODE *quote)
4194
- {
4310
+ static int parser_parse_string(rb_parser_state* parser_state, NODE *quote) {
4195
4311
  int func = (int)quote->nd_func;
4196
4312
  int term = nd_term(quote);
4197
4313
  int paren = nd_paren(quote);
@@ -4248,15 +4364,18 @@ parser_parse_string(rb_parser_state* parser_state, NODE *quote)
4248
4364
 
4249
4365
  /* Called when the lexer detects a heredoc is beginning. This pulls
4250
4366
  in more characters and detects what kind of heredoc it is. */
4251
- static int
4252
- parser_heredoc_identifier(rb_parser_state* parser_state)
4253
- {
4367
+ static int parser_heredoc_identifier(rb_parser_state* parser_state) {
4254
4368
  int c = nextc(), term, func = 0;
4255
4369
  size_t len;
4256
4370
 
4257
4371
  if(c == '-') {
4258
4372
  c = nextc();
4259
4373
  func = STR_FUNC_INDENT;
4374
+ } else if(c == '~') {
4375
+ c = nextc();
4376
+ func = STR_FUNC_INDENT;
4377
+ heredoc_indent = INT_MAX;
4378
+ heredoc_line_indent = 0;
4260
4379
  }
4261
4380
  switch(c) {
4262
4381
  case '\'':
@@ -4299,7 +4418,7 @@ parser_heredoc_identifier(rb_parser_state* parser_state)
4299
4418
  if(!parser_is_identchar()) {
4300
4419
  pushback(c);
4301
4420
  if(func & STR_FUNC_INDENT) {
4302
- pushback('-');
4421
+ pushback(heredoc_indent > 0 ? '~' : '-');
4303
4422
  }
4304
4423
  return 0;
4305
4424
  }
@@ -4332,9 +4451,7 @@ parser_heredoc_identifier(rb_parser_state* parser_state)
4332
4451
  return term == '`' ? tXSTRING_BEG : tSTRING_BEG;
4333
4452
  }
4334
4453
 
4335
- static void
4336
- parser_heredoc_restore(rb_parser_state* parser_state, NODE *here)
4337
- {
4454
+ static void parser_heredoc_restore(rb_parser_state* parser_state, NODE *here) {
4338
4455
  VALUE line;
4339
4456
 
4340
4457
  lex_strterm = 0;
@@ -4347,6 +4464,79 @@ parser_heredoc_restore(rb_parser_state* parser_state, NODE *here)
4347
4464
  sourceline = nd_line(here);
4348
4465
  }
4349
4466
 
4467
+ static int dedent_pos(const char *str, long len, int width) {
4468
+ int i, col = 0;
4469
+
4470
+ for(i = 0; i < len && col < width; i++) {
4471
+ if(str[i] == ' ') {
4472
+ col++;
4473
+ } else if(str[i] == '\t') {
4474
+ int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
4475
+ if(n > width) break;
4476
+ col = n;
4477
+ } else {
4478
+ break;
4479
+ }
4480
+ }
4481
+ return i;
4482
+ }
4483
+
4484
+ static VALUE parser_heredoc_dedent_string(VALUE input, int width, int first) {
4485
+ long len;
4486
+ int col;
4487
+ char *str, *p, *out_p, *end, *t;
4488
+
4489
+ RSTRING_GETMEM(input, str, len);
4490
+ end = &str[len];
4491
+
4492
+ p = str;
4493
+ if(!first) {
4494
+ p = (char*)memchr(p, '\n', end - p);
4495
+ if(!p) return input;
4496
+ p++;
4497
+ }
4498
+ out_p = p;
4499
+ while(p < end) {
4500
+ col = dedent_pos(p, end - p, width);
4501
+ p += col;
4502
+ if(!(t = (char*)memchr(p, '\n', end - p)))
4503
+ t = end;
4504
+ else
4505
+ ++t;
4506
+ if(p > out_p) memmove(out_p, p, t - p);
4507
+ out_p += t - p;
4508
+ p = t;
4509
+ }
4510
+ rb_str_set_len(input, out_p - str);
4511
+
4512
+ return input;
4513
+ }
4514
+
4515
+ static void parser_heredoc_dedent(rb_parser_state* parser_state, NODE *root) {
4516
+ NODE *node, *str_node;
4517
+ int first = TRUE;
4518
+ int indent = heredoc_indent;
4519
+
4520
+ if(indent <= 0) return;
4521
+
4522
+ node = str_node = root;
4523
+
4524
+ while(str_node) {
4525
+ VALUE lit = str_node->nd_lit;
4526
+ if(NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
4527
+ rb_compile_error(parser_state, "dedent failure: %d: %ld", indent, lit);
4528
+ first = FALSE;
4529
+
4530
+ str_node = 0;
4531
+ while((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
4532
+ if((str_node = node->nd_head) != 0) {
4533
+ int type = nd_type(str_node);
4534
+ if(type == NODE_STR || type == NODE_DSTR) break;
4535
+ }
4536
+ }
4537
+ }
4538
+ }
4539
+
4350
4540
  static int
4351
4541
  parser_whole_match_p(rb_parser_state* parser_state, const char *eos, ssize_t len, int indent)
4352
4542
  {
@@ -4393,6 +4583,14 @@ parser_number_literal_suffix(rb_parser_state* parser_state, int mask)
4393
4583
  return 0;
4394
4584
  }
4395
4585
  pushback(c);
4586
+ if(c == '.') {
4587
+ c = peekc_n(1);
4588
+ if(ISDIGIT(c)) {
4589
+ yy_error("unexpected fraction part after numeric literal");
4590
+ lex_p += 2;
4591
+ while(parser_is_identchar()) nextc();
4592
+ }
4593
+ }
4396
4594
  break;
4397
4595
  }
4398
4596
 
@@ -4432,7 +4630,7 @@ static int
4432
4630
  parser_set_integer_literal(rb_parser_state* parser_state, VALUE v, int suffix)
4433
4631
  {
4434
4632
  int type = tINTEGER;
4435
- if (suffix & NUM_SUFFIX_R) {
4633
+ if(suffix & NUM_SUFFIX_R) {
4436
4634
  v = rb_funcall(rb_cObject, rb_intern("Rational"), 1, v);
4437
4635
  type = tRATIONAL;
4438
4636
  }
@@ -4443,9 +4641,7 @@ parser_set_integer_literal(rb_parser_state* parser_state, VALUE v, int suffix)
4443
4641
  is responsible for detecting an expandions (ie #{}) in the heredoc
4444
4642
  and emitting a lex token and also detecting the end of the heredoc. */
4445
4643
 
4446
- static int
4447
- parser_here_document(rb_parser_state* parser_state, NODE *here)
4448
- {
4644
+ static int parser_here_document(rb_parser_state* parser_state, NODE *here) {
4449
4645
  int c, func, indent = 0;
4450
4646
  const char *eos, *p, *pend;
4451
4647
  ssize_t len;
@@ -4479,6 +4675,7 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
4479
4675
  we find the identifier. */
4480
4676
 
4481
4677
  if((func & STR_FUNC_EXPAND) == 0) {
4678
+ int end = 0;
4482
4679
  do {
4483
4680
  p = RSTRING_PTR(lex_lastline);
4484
4681
  pend = lex_pend;
@@ -4493,6 +4690,15 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
4493
4690
  --pend;
4494
4691
  }
4495
4692
  }
4693
+
4694
+ if(heredoc_indent > 0) {
4695
+ long i = 0;
4696
+ while(p + i < pend && parser_update_heredoc_indent(parser_state, p[i])) {
4697
+ i++;
4698
+ }
4699
+ heredoc_line_indent = 0;
4700
+ }
4701
+
4496
4702
  if(str) {
4497
4703
  rb_str_cat(str, p, pend - p);
4498
4704
  } else {
@@ -4503,7 +4709,7 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
4503
4709
  if(nextc() == -1) {
4504
4710
  goto error;
4505
4711
  }
4506
- } while(!whole_match_p(eos, len, indent));
4712
+ } while(!(end = whole_match_p(eos, len, indent)));
4507
4713
  } else {
4508
4714
  newtok();
4509
4715
  if(c == '#') {
@@ -4543,17 +4749,14 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
4543
4749
 
4544
4750
  #include "lex.c.blt"
4545
4751
 
4546
- static int
4547
- arg_ambiguous()
4548
- {
4549
- rb_warning("ambiguous first argument; put parentheses or even spaces");
4752
+ static int parser_arg_ambiguous(rb_parser_state* parser_state, char c) {
4753
+ rb_warningS(
4754
+ "ambiguous first argument; put parentheses or a space even after `%c' operator", c);
4550
4755
 
4551
4756
  return 1;
4552
4757
  }
4553
4758
 
4554
- static ID
4555
- parser_formal_argument(rb_parser_state* parser_state, ID lhs)
4556
- {
4759
+ static ID parser_formal_argument(rb_parser_state* parser_state, ID lhs) {
4557
4760
  if(!is_local_id(lhs)) {
4558
4761
  yy_error("formal argument must be local variable");
4559
4762
  }
@@ -4561,14 +4764,11 @@ parser_formal_argument(rb_parser_state* parser_state, ID lhs)
4561
4764
  return lhs;
4562
4765
  }
4563
4766
 
4564
- static bool
4565
- parser_lvar_defined(rb_parser_state* parser_state, ID id) {
4767
+ static bool parser_lvar_defined(rb_parser_state* parser_state, ID id) {
4566
4768
  return (in_block() && bv_defined(id)) || local_id(id);
4567
4769
  }
4568
4770
 
4569
- static long
4570
- parser_encode_length(rb_parser_state* parser_state, const char *name, long len)
4571
- {
4771
+ static long parser_encode_length(rb_parser_state* parser_state, const char *name, long len) {
4572
4772
  long nlen;
4573
4773
 
4574
4774
  if(len > 5 && name[nlen = len - 5] == '-') {
@@ -4608,13 +4808,27 @@ parser_set_encode(rb_parser_state* parser_state, const char *name)
4608
4808
  parser_state->enc = enc;
4609
4809
  }
4610
4810
 
4811
+ static void
4812
+ parser_set_compile_option_flag(rb_parser_state* parser_state,
4813
+ const char *name, const char *val)
4814
+ {
4815
+ // TODO: 2.3
4816
+ }
4817
+
4818
+ static void
4819
+ parser_set_token_info(rb_parser_state* parser_state,
4820
+ const char *name, const char *val)
4821
+ {
4822
+ // TODO: 2.3
4823
+ }
4824
+
4611
4825
  static int
4612
4826
  comment_at_top(rb_parser_state* parser_state)
4613
4827
  {
4614
4828
  const char *p = lex_pbeg, *pend = lex_p - 1;
4615
4829
  if(line_count != (has_shebang ? 2 : 1)) return FALSE;
4616
4830
  while(p < pend) {
4617
- if (!ISSPACE(*p)) return FALSE;
4831
+ if(!ISSPACE(*p)) return FALSE;
4618
4832
  p++;
4619
4833
  }
4620
4834
  return TRUE;
@@ -4638,14 +4852,14 @@ struct magic_comment {
4638
4852
  rb_magic_comment_length_t length;
4639
4853
  };
4640
4854
 
4641
- static const struct magic_comment magic_comments[] = {
4642
- {"coding", magic_comment_encoding, parser_encode_length},
4643
- {"encoding", magic_comment_encoding, parser_encode_length},
4855
+ static const struct magic_comment magic_comments[4] = {
4856
+ {"coding", magic_comment_encoding, parser_encode_length},
4857
+ {"encoding", magic_comment_encoding, parser_encode_length},
4858
+ {"frozen_string_literal", parser_set_compile_option_flag},
4859
+ {"warn_indent", parser_set_token_info},
4644
4860
  };
4645
4861
 
4646
- static const char *
4647
- magic_comment_marker(const char *str, long len)
4648
- {
4862
+ static const char * magic_comment_marker(const char *str, long len) {
4649
4863
  long i = 2;
4650
4864
 
4651
4865
  while(i < len) {
@@ -4674,9 +4888,8 @@ magic_comment_marker(const char *str, long len)
4674
4888
  return 0;
4675
4889
  }
4676
4890
 
4677
- static int
4678
- parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
4679
- {
4891
+ static int parser_magic_comment(rb_parser_state* parser_state, const char *str, long len) {
4892
+ int indicator = 0;
4680
4893
  VALUE name = 0, val = 0;
4681
4894
  const char *beg, *end, *vbeg, *vend;
4682
4895
 
@@ -4686,10 +4899,15 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
4686
4899
  : (void)((_s) = REF(STR_NEW((_p), (_n)))))
4687
4900
 
4688
4901
  if(len <= 7) return FALSE;
4689
- if(!(beg = magic_comment_marker(str, len))) return FALSE;
4690
- if(!(end = magic_comment_marker(beg, str + len - beg))) return FALSE;
4691
- str = beg;
4692
- len = end - beg - 3;
4902
+
4903
+ if(!!(beg = magic_comment_marker(str, len))) {
4904
+ if(!(end = magic_comment_marker(beg, str + len - beg))) {
4905
+ return FALSE;
4906
+ }
4907
+ indicator = TRUE;
4908
+ str = beg;
4909
+ len = end - beg - 3;
4910
+ }
4693
4911
 
4694
4912
  /* %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*" */
4695
4913
  while(len > 0) {
@@ -4719,7 +4937,10 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
4719
4937
  // nothing
4720
4938
  }
4721
4939
  if(!len) break;
4722
- if(*str != ':') continue;
4940
+ if(*str != ':') {
4941
+ if(!indicator) return FALSE;
4942
+ continue;
4943
+ }
4723
4944
 
4724
4945
  do str++; while(--len > 0 && ISSPACE(*str));
4725
4946
  if(!len) break;
@@ -4743,7 +4964,12 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
4743
4964
  }
4744
4965
  vend = str;
4745
4966
  }
4746
- while(len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++;
4967
+ if(indicator) {
4968
+ while(len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++;
4969
+ } else {
4970
+ while(len > 0 && (ISSPACE(*str))) --len, str++;
4971
+ if(len) return FALSE;
4972
+ }
4747
4973
 
4748
4974
  n = end - beg;
4749
4975
  str_copy(name, beg, n);
@@ -4752,7 +4978,7 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
4752
4978
  if(s[i] == '-') s[i] = '_';
4753
4979
  }
4754
4980
  do {
4755
- if(strncasecmp(p->name, s, n) == 0) {
4981
+ if(strncasecmp(p->name, s, n) == 0 && !p->name[n]) {
4756
4982
  n = vend - vbeg;
4757
4983
  if(p->length) {
4758
4984
  n = (*p->length)(parser_state, vbeg, n);
@@ -4822,7 +5048,7 @@ parser_prepare(rb_parser_state* parser_state)
4822
5048
  case '#':
4823
5049
  if(peek('!')) has_shebang = 1;
4824
5050
  break;
4825
- case 0xef: /* UTF-8 BOM marker */
5051
+ case 0xef: /* UTF-8 BOM marker */
4826
5052
  if(lex_pend - lex_p >= 2 &&
4827
5053
  (unsigned char)lex_p[0] == 0xbb &&
4828
5054
  (unsigned char)lex_p[1] == 0xbf) {
@@ -4841,10 +5067,11 @@ parser_prepare(rb_parser_state* parser_state)
4841
5067
 
4842
5068
  #define IS_ARG() lex_state_p(EXPR_ARG_ANY)
4843
5069
  #define IS_END() lex_state_p(EXPR_END_ANY)
4844
- #define IS_BEG() lex_state_p(EXPR_BEG_ANY)
5070
+ #define IS_BEG() (lex_state_p(EXPR_BEG_ANY) \
5071
+ || lex_state_all_p(EXPR_ARG | EXPR_LABELED))
4845
5072
  #define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c))
4846
- #define IS_LABEL_POSSIBLE() ((lex_state_p(EXPR_BEG | EXPR_ENDFN) && !cmd_state) \
4847
- || IS_ARG())
5073
+ #define IS_LABEL_POSSIBLE() ((lex_state_p(EXPR_LABEL | EXPR_ENDFN) && !cmd_state) || \
5074
+ IS_ARG())
4848
5075
  #define IS_LABEL_SUFFIX(n) (peek_n(':',(n)) && !peek_n(':', (n)+1))
4849
5076
  #define IS_AFTER_OPERATOR() lex_state_p(EXPR_FNAME | EXPR_DOT)
4850
5077
 
@@ -4857,28 +5084,786 @@ parser_prepare(rb_parser_state* parser_state)
4857
5084
  space_seen && !ISSPACE(c) && \
4858
5085
  (ambiguous_operator(op, syn), 0)))
4859
5086
 
4860
- static int
4861
- parser_yylex(rb_parser_state *parser_state)
5087
+ static VALUE
5088
+ parse_rational(rb_parser_state* parser_state, char *str, int len, int seen_point)
4862
5089
  {
4863
- int c;
4864
- int space_seen = 0;
4865
- int cmd_state;
4866
- int label;
4867
- enum lex_state_e last_state;
4868
- rb_encoding *enc;
4869
- int mb;
5090
+ VALUE v;
5091
+ char *point = &str[seen_point];
5092
+ size_t fraclen = len-seen_point-1;
5093
+ memmove(point, point+1, fraclen+1);
5094
+ v = rb_cstr_to_inum(str, 10, FALSE);
5095
+ return rb_rational_new(v,
5096
+ rb_funcall(INT2FIX(10), rb_intern("**"), 1, INT2NUM(fraclen)));
5097
+ }
4870
5098
 
4871
- if(lex_strterm) {
4872
- int token;
4873
- if(nd_type(lex_strterm) == NODE_HEREDOC) {
4874
- token = here_document(lex_strterm);
4875
- if(token == tSTRING_END) {
4876
- lex_strterm = 0;
4877
- lex_state = EXPR_END;
4878
- }
4879
- } else {
4880
- token = parse_string(lex_strterm);
4881
- if(token == tSTRING_END && (lex_strterm->nd_func & STR_FUNC_LABEL)) {
5099
+ static int parse_numeric(rb_parser_state* parser_state, int c) {
5100
+ int is_float, seen_point, seen_e, nondigit;
5101
+ int suffix;
5102
+
5103
+ is_float = seen_point = seen_e = nondigit = 0;
5104
+ SET_LEX_STATE(EXPR_END);
5105
+ newtok();
5106
+ if(c == '-' || c == '+') {
5107
+ tokadd(c);
5108
+ c = nextc();
5109
+ }
5110
+ if(c == '0') {
5111
+ #define no_digits() do {yy_error("numeric literal without digits"); return 0;} while(0)
5112
+ int start = toklen();
5113
+ c = nextc();
5114
+ if(c == 'x' || c == 'X') {
5115
+ /* hexadecimal */
5116
+ c = nextc();
5117
+ if(c != -1 && ISXDIGIT(c)) {
5118
+ do {
5119
+ if(c == '_') {
5120
+ if(nondigit) break;
5121
+ nondigit = c;
5122
+ continue;
5123
+ }
5124
+ if(!ISXDIGIT(c)) break;
5125
+ nondigit = 0;
5126
+ tokadd(c);
5127
+ } while((c = nextc()) != -1);
5128
+ }
5129
+ pushback(c);
5130
+ tokfix();
5131
+ if(toklen() == start) {
5132
+ no_digits();
5133
+ } else if(nondigit) {
5134
+ goto trailing_uc;
5135
+ }
5136
+ suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5137
+ return set_integer_literal(rb_cstr_to_inum(tok(), 16, FALSE), suffix);
5138
+ }
5139
+ if(c == 'b' || c == 'B') {
5140
+ /* binary */
5141
+ c = nextc();
5142
+ if(c == '0' || c == '1') {
5143
+ do {
5144
+ if(c == '_') {
5145
+ if(nondigit) break;
5146
+ nondigit = c;
5147
+ continue;
5148
+ }
5149
+ if(c != '0' && c != '1') break;
5150
+ nondigit = 0;
5151
+ tokadd(c);
5152
+ } while((c = nextc()) != -1);
5153
+ }
5154
+ pushback(c);
5155
+ tokfix();
5156
+ if(toklen() == start) {
5157
+ no_digits();
5158
+ } else if(nondigit) {
5159
+ goto trailing_uc;
5160
+ }
5161
+ suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5162
+ return set_integer_literal(rb_cstr_to_inum(tok(), 2, FALSE), suffix);
5163
+ }
5164
+ if(c == 'd' || c == 'D') {
5165
+ /* decimal */
5166
+ c = nextc();
5167
+ if(c != -1 && ISDIGIT(c)) {
5168
+ do {
5169
+ if(c == '_') {
5170
+ if(nondigit) break;
5171
+ nondigit = c;
5172
+ continue;
5173
+ }
5174
+ if(!ISDIGIT(c)) break;
5175
+ nondigit = 0;
5176
+ tokadd(c);
5177
+ } while((c = nextc()) != -1);
5178
+ }
5179
+ pushback(c);
5180
+ tokfix();
5181
+ if(toklen() == start) {
5182
+ no_digits();
5183
+ } else if(nondigit) {
5184
+ goto trailing_uc;
5185
+ }
5186
+ suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5187
+ return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
5188
+ }
5189
+ if(c == '_') {
5190
+ /* 0_0 */
5191
+ goto octal_number;
5192
+ }
5193
+ if(c == 'o' || c == 'O') {
5194
+ /* prefixed octal */
5195
+ c = nextc();
5196
+ if(c == -1 || c == '_' || !ISDIGIT(c)) {
5197
+ no_digits();
5198
+ }
5199
+ }
5200
+ if(c >= '0' && c <= '7') {
5201
+ /* octal */
5202
+ octal_number:
5203
+ do {
5204
+ if(c == '_') {
5205
+ if(nondigit) break;
5206
+ nondigit = c;
5207
+ continue;
5208
+ }
5209
+ if(c < '0' || c > '9') break;
5210
+ if(c > '7') goto invalid_octal;
5211
+ nondigit = 0;
5212
+ tokadd(c);
5213
+ } while((c = nextc()) != -1);
5214
+ if(toklen() > start) {
5215
+ pushback(c);
5216
+ tokfix();
5217
+ if(nondigit) goto trailing_uc;
5218
+ suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5219
+ return set_integer_literal(rb_cstr_to_inum(tok(), 8, FALSE), suffix);
5220
+ }
5221
+ if(nondigit) {
5222
+ pushback(c);
5223
+ goto trailing_uc;
5224
+ }
5225
+ }
5226
+ if(c > '7' && c <= '9') {
5227
+ invalid_octal:
5228
+ yy_error("Invalid octal digit");
5229
+ } else if(c == '.' || c == 'e' || c == 'E') {
5230
+ tokadd('0');
5231
+ } else {
5232
+ pushback(c);
5233
+ suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5234
+ return set_integer_literal(INT2FIX(0), suffix);
5235
+ }
5236
+ }
5237
+
5238
+ for(;;) {
5239
+ switch(c) {
5240
+ case '0': case '1': case '2': case '3': case '4':
5241
+ case '5': case '6': case '7': case '8': case '9':
5242
+ nondigit = 0;
5243
+ tokadd(c);
5244
+ break;
5245
+
5246
+ case '.':
5247
+ if(nondigit) goto trailing_uc;
5248
+ if(seen_point || seen_e) {
5249
+ goto decode_num;
5250
+ } else {
5251
+ int c0 = nextc();
5252
+ if(c0 == -1 || !ISDIGIT(c0)) {
5253
+ pushback(c0);
5254
+ goto decode_num;
5255
+ }
5256
+ c = c0;
5257
+ }
5258
+ seen_point = toklen();
5259
+ tokadd('.');
5260
+ tokadd(c);
5261
+ is_float++;
5262
+ nondigit = 0;
5263
+ break;
5264
+
5265
+ case 'e':
5266
+ case 'E':
5267
+ if(nondigit) {
5268
+ pushback(c);
5269
+ c = nondigit;
5270
+ goto decode_num;
5271
+ }
5272
+ if(seen_e) {
5273
+ goto decode_num;
5274
+ }
5275
+ nondigit = c;
5276
+ c = nextc();
5277
+ if(c != '-' && c != '+' && !ISDIGIT(c)) {
5278
+ pushback(c);
5279
+ nondigit = 0;
5280
+ goto decode_num;
5281
+ }
5282
+ tokadd(nondigit);
5283
+ seen_e++;
5284
+ is_float++;
5285
+ tokadd(c);
5286
+ nondigit = (c == '-' || c == '+') ? c : 0;
5287
+ break;
5288
+
5289
+ case '_': /* `_' in number just ignored */
5290
+ if(nondigit) goto decode_num;
5291
+ nondigit = c;
5292
+ break;
5293
+
5294
+ default:
5295
+ goto decode_num;
5296
+ }
5297
+ c = nextc();
5298
+ }
5299
+
5300
+ decode_num:
5301
+ pushback(c);
5302
+ if(nondigit) {
5303
+ char tmp[30];
5304
+ trailing_uc:
5305
+ snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit);
5306
+ yy_error(tmp);
5307
+ }
5308
+ tokfix();
5309
+ if(is_float) {
5310
+ int type = tFLOAT;
5311
+ VALUE v;
5312
+
5313
+ suffix = number_literal_suffix(seen_e ? NUM_SUFFIX_I : NUM_SUFFIX_ALL);
5314
+ if(suffix & NUM_SUFFIX_R) {
5315
+ type = tRATIONAL;
5316
+ v = parse_rational(parser_state, tok(), toklen(), seen_point);
5317
+ } else {
5318
+ double d = strtod(tok(), 0);
5319
+ if(errno == ERANGE) {
5320
+ rb_warningS("Float %s out of range", tok());
5321
+ errno = 0;
5322
+ }
5323
+ v = rb_float_new(d);
5324
+ }
5325
+ return set_number_literal(v, type, suffix);
5326
+ }
5327
+
5328
+ suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5329
+ return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
5330
+ }
5331
+
5332
+ static int parse_qmark(rb_parser_state* parser_state) {
5333
+ rb_encoding *enc;
5334
+ int c;
5335
+
5336
+ if(IS_END()) {
5337
+ SET_LEX_STATE(EXPR_VALUE);
5338
+ return '?';
5339
+ }
5340
+ c = nextc();
5341
+ if(c == -1) {
5342
+ rb_compile_error(parser_state, "incomplete character syntax");
5343
+ return 0;
5344
+ }
5345
+ if(parser_enc_isspace(c, parser_state->enc)) {
5346
+ if(!IS_ARG()) {
5347
+ int c2 = 0;
5348
+ switch(c) {
5349
+ case ' ':
5350
+ c2 = 's';
5351
+ break;
5352
+ case '\n':
5353
+ c2 = 'n';
5354
+ break;
5355
+ case '\t':
5356
+ c2 = 't';
5357
+ break;
5358
+ case '\v':
5359
+ c2 = 'v';
5360
+ break;
5361
+ case '\r':
5362
+ c2 = 'r';
5363
+ break;
5364
+ case '\f':
5365
+ c2 = 'f';
5366
+ break;
5367
+ }
5368
+ if(c2) {
5369
+ rb_warn("invalid character syntax; use ?\\%c", c2);
5370
+ }
5371
+ }
5372
+ ternary:
5373
+ pushback(c);
5374
+ SET_LEX_STATE(EXPR_VALUE);
5375
+ return '?';
5376
+ }
5377
+ newtok();
5378
+ enc = parser_state->enc;
5379
+ if(!parser_isascii()) {
5380
+ if(tokadd_mbchar(c) == -1) return 0;
5381
+ } else if((parser_enc_isalnum(c, parser_state->enc) || c == '_') &&
5382
+ lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser_state->enc)) {
5383
+ goto ternary;
5384
+ } else if(c == '\\') {
5385
+ if(peek('u')) {
5386
+ nextc();
5387
+ c = parser_tokadd_utf8(parser_state, &enc, 0, 0, 0);
5388
+ if(0x80 <= c) {
5389
+ tokaddmbc(c, enc);
5390
+ } else {
5391
+ tokadd(c);
5392
+ }
5393
+ } else if(!lex_eol_p() && !(c = *lex_p, ISASCII(c))) {
5394
+ nextc();
5395
+ if(tokadd_mbchar(c) == -1) return 0;
5396
+ } else {
5397
+ c = read_escape(0, &enc);
5398
+ tokadd(c);
5399
+ }
5400
+ } else {
5401
+ tokadd(c);
5402
+ }
5403
+ tokfix();
5404
+ set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0));
5405
+ SET_LEX_STATE(EXPR_END);
5406
+ return tCHAR;
5407
+ }
5408
+
5409
+ static int
5410
+ parse_percent(rb_parser_state* parser_state, const int space_seen,
5411
+ const enum lex_state_e last_state)
5412
+ {
5413
+ int c;
5414
+
5415
+ if(IS_BEG()) {
5416
+ int term;
5417
+ int paren;
5418
+
5419
+ c = nextc();
5420
+ quotation:
5421
+ if(c == -1 || !ISALNUM(c)) {
5422
+ term = c;
5423
+ c = 'Q';
5424
+ } else {
5425
+ term = nextc();
5426
+ if(parser_enc_isalnum((int)term, parser_state->enc) || !parser_isascii()) {
5427
+ yy_error("unknown type of %string");
5428
+ return 0;
5429
+ }
5430
+ }
5431
+ if(c == -1 || term == -1) {
5432
+ rb_compile_error(parser_state, "unterminated quoted string meets end of file");
5433
+ return 0;
5434
+ }
5435
+ paren = term;
5436
+ if(term == '(') term = ')';
5437
+ else if(term == '[') term = ']';
5438
+ else if(term == '{') term = '}';
5439
+ else if(term == '<') term = '>';
5440
+ else paren = 0;
5441
+
5442
+ switch(c) {
5443
+ case 'Q':
5444
+ lex_strterm = NEW_STRTERM(str_dquote, term, paren);
5445
+ return tSTRING_BEG;
5446
+
5447
+ case 'q':
5448
+ lex_strterm = NEW_STRTERM(str_squote, term, paren);
5449
+ return tSTRING_BEG;
5450
+
5451
+ case 'W':
5452
+ lex_strterm = NEW_STRTERM(str_dword, term, paren);
5453
+ do {c = nextc();} while(ISSPACE(c));
5454
+ pushback(c);
5455
+ return tWORDS_BEG;
5456
+
5457
+ case 'w':
5458
+ lex_strterm = NEW_STRTERM(str_sword, term, paren);
5459
+ do {c = nextc();} while(ISSPACE(c));
5460
+ pushback(c);
5461
+ return tQWORDS_BEG;
5462
+
5463
+ case 'I':
5464
+ lex_strterm = NEW_STRTERM(str_dword, term, paren);
5465
+ do {c = nextc();} while(ISSPACE(c));
5466
+ pushback(c);
5467
+ return tSYMBOLS_BEG;
5468
+
5469
+ case 'i':
5470
+ lex_strterm = NEW_STRTERM(str_sword, term, paren);
5471
+ do {c = nextc();} while(ISSPACE(c));
5472
+ pushback(c);
5473
+ return tQSYMBOLS_BEG;
5474
+
5475
+ case 'x':
5476
+ lex_strterm = NEW_STRTERM(str_xquote, term, paren);
5477
+ return tXSTRING_BEG;
5478
+
5479
+ case 'r':
5480
+ lex_strterm = NEW_STRTERM(str_regexp, term, paren);
5481
+ return tREGEXP_BEG;
5482
+
5483
+ case 's':
5484
+ lex_strterm = NEW_STRTERM(str_ssym, term, paren);
5485
+ SET_LEX_STATE(EXPR_FNAME | EXPR_FITEM);
5486
+ return tSYMBEG;
5487
+
5488
+ default:
5489
+ yy_error("unknown type of %string");
5490
+ return 0;
5491
+ }
5492
+ }
5493
+ if((c = nextc()) == '=') {
5494
+ set_yylval_id('%');
5495
+ SET_LEX_STATE(EXPR_BEG);
5496
+ return tOP_ASGN;
5497
+ }
5498
+ if(IS_SPCARG(c) || (lex_state_p(EXPR_FITEM) && c == 's')) {
5499
+ goto quotation;
5500
+ }
5501
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
5502
+ pushback(c);
5503
+ warn_balanced("%%", "string literal");
5504
+ return '%';
5505
+ }
5506
+
5507
+ static int tokadd_ident(rb_parser_state* parser_state, int c) {
5508
+ do {
5509
+ if(tokadd_mbchar(c) == -1) return -1;
5510
+ c = nextc();
5511
+ } while(parser_is_identchar());
5512
+ pushback(c);
5513
+ return 0;
5514
+ }
5515
+
5516
+ static ID tokenize_ident(rb_parser_state* parser_state, const enum lex_state_e last_state) {
5517
+ ID ident = TOK_INTERN();
5518
+
5519
+ set_yylval_name(ident);
5520
+
5521
+ return ident;
5522
+ }
5523
+
5524
+ const signed char ruby_digit36_to_number_table[] = {
5525
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
5526
+ /*0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5527
+ /*1*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5528
+ /*2*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5529
+ /*3*/ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
5530
+ /*4*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
5531
+ /*5*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
5532
+ /*6*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
5533
+ /*7*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
5534
+ /*8*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5535
+ /*9*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5536
+ /*a*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5537
+ /*b*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5538
+ /*c*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5539
+ /*d*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5540
+ /*e*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5541
+ /*f*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
5542
+ };
5543
+
5544
+ unsigned long
5545
+ ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow)
5546
+ {
5547
+ const char *start = str;
5548
+ unsigned long ret = 0, x;
5549
+ unsigned long mul_overflow = (~(unsigned long)0) / base;
5550
+
5551
+ *overflow = 0;
5552
+
5553
+ if(!len) {
5554
+ *retlen = 0;
5555
+ return 0;
5556
+ }
5557
+
5558
+ do {
5559
+ int d = ruby_digit36_to_number_table[(unsigned char)*str++];
5560
+ if(d == -1 || base <= d) {
5561
+ --str;
5562
+ break;
5563
+ }
5564
+ if(mul_overflow < ret) {
5565
+ *overflow = 1;
5566
+ }
5567
+ ret *= base;
5568
+ x = ret;
5569
+ ret += d;
5570
+ if(ret < x) {
5571
+ *overflow = 1;
5572
+ }
5573
+ } while(len < 0 || --len);
5574
+
5575
+ *retlen = str - start;
5576
+ return ret;
5577
+ }
5578
+
5579
+ static int parse_numvar(rb_parser_state* parser_state) {
5580
+ size_t len;
5581
+ int overflow;
5582
+ unsigned long n = ruby_scan_digits(tok()+1, toklen()-1, 10, &len, &overflow);
5583
+ const unsigned long nth_ref_max =
5584
+ ((FIXNUM_MAX < INT_MAX) ? FIXNUM_MAX : INT_MAX) >> 1;
5585
+ /* NTH_REF is left-shifted to be ORed with back-ref flag and
5586
+ * turned into a Fixnum, in compile.c */
5587
+
5588
+ if(overflow || n > nth_ref_max) {
5589
+ /* compile_error()? */
5590
+ rb_warnS("`%s' is too big for a number variable, always nil", tok());
5591
+ return 0; /* $0 is $PROGRAM_NAME, not NTH_REF */
5592
+ } else {
5593
+ return (int)n;
5594
+ }
5595
+ }
5596
+
5597
+ static int parse_gvar(rb_parser_state* parser_state, const enum lex_state_e last_state) {
5598
+ int c;
5599
+
5600
+ SET_LEX_STATE(EXPR_END);
5601
+ newtok();
5602
+ c = nextc();
5603
+ switch(c) {
5604
+ case '_': /* $_: last read line string */
5605
+ c = nextc();
5606
+ if(parser_is_identchar()) {
5607
+ tokadd('$');
5608
+ tokadd('_');
5609
+ break;
5610
+ }
5611
+ pushback(c);
5612
+ c = '_';
5613
+ /* fall through */
5614
+ case '~': /* $~: match-data */
5615
+ case '*': /* $*: argv */
5616
+ case '$': /* $$: pid */
5617
+ case '?': /* $?: last status */
5618
+ case '!': /* $!: error string */
5619
+ case '@': /* $@: error position */
5620
+ case '/': /* $/: input record separator */
5621
+ case '\\': /* $\: output record separator */
5622
+ case ';': /* $;: field separator */
5623
+ case ',': /* $,: output field separator */
5624
+ case '.': /* $.: last read line number */
5625
+ case '=': /* $=: ignorecase */
5626
+ case ':': /* $:: load path */
5627
+ case '<': /* $<: reading filename */
5628
+ case '>': /* $>: default output handle */
5629
+ case '\"': /* $": already loaded files */
5630
+ tokadd('$');
5631
+ tokadd(c);
5632
+ goto gvar;
5633
+
5634
+ case '-':
5635
+ tokadd('$');
5636
+ tokadd(c);
5637
+ c = nextc();
5638
+ if(parser_is_identchar()) {
5639
+ if(tokadd_mbchar(c) == -1) return 0;
5640
+ } else {
5641
+ pushback(c);
5642
+ pushback('-');
5643
+ return '$';
5644
+ }
5645
+ gvar:
5646
+ tokfix();
5647
+ set_yylval_name(TOK_INTERN());
5648
+ return tGVAR;
5649
+
5650
+ case '&': /* $&: last match */
5651
+ case '`': /* $`: string before last match */
5652
+ case '\'': /* $': string after last match */
5653
+ case '+': /* $+: string matches last paren. */
5654
+ if(lex_state_of_p(last_state, EXPR_FNAME)) {
5655
+ tokadd('$');
5656
+ tokadd(c);
5657
+ goto gvar;
5658
+ }
5659
+ set_yylval_node(NEW_BACK_REF(c));
5660
+ return tBACK_REF;
5661
+
5662
+ case '1': case '2': case '3':
5663
+ case '4': case '5': case '6':
5664
+ case '7': case '8': case '9':
5665
+ tokadd('$');
5666
+ do {
5667
+ tokadd(c);
5668
+ c = nextc();
5669
+ } while(c != -1 && ISDIGIT(c));
5670
+ pushback(c);
5671
+ if(lex_state_of_p(last_state, EXPR_FNAME)) goto gvar;
5672
+ tokfix();
5673
+ set_yylval_node(NEW_NTH_REF(parse_numvar(parser_state)));
5674
+ return tNTH_REF;
5675
+
5676
+ default:
5677
+ if(!parser_is_identchar()) {
5678
+ if(c == -1 || ISSPACE(c)) {
5679
+ rb_compile_error(parser_state,
5680
+ "`$' without identifiers is not allowed as a global variable name");
5681
+ } else {
5682
+ pushback(c);
5683
+ rb_compile_error(parser_state, "`$%c' is not allowed as a global variable name", c);
5684
+ }
5685
+ return 0;
5686
+ }
5687
+ case '0':
5688
+ tokadd('$');
5689
+ }
5690
+
5691
+ if(tokadd_ident(parser_state, c)) return 0;
5692
+ SET_LEX_STATE(EXPR_END);
5693
+ tokenize_ident(parser_state, last_state);
5694
+ return tGVAR;
5695
+ }
5696
+
5697
+ static int
5698
+ parse_atmark(rb_parser_state* parser_state, const enum lex_state_e last_state)
5699
+ {
5700
+ int result = tIVAR;
5701
+ int c = nextc();
5702
+
5703
+ newtok();
5704
+ tokadd('@');
5705
+ if(c == '@') {
5706
+ result = tCVAR;
5707
+ tokadd('@');
5708
+ c = nextc();
5709
+ }
5710
+ if(c == -1 || ISSPACE(c)) {
5711
+ if(result == tIVAR) {
5712
+ rb_compile_error(parser_state,
5713
+ "`@' without identifiers is not allowed as an instance variable name");
5714
+ } else {
5715
+ rb_compile_error(parser_state,
5716
+ "`@@' without identifiers is not allowed as a class variable name");
5717
+ }
5718
+ return 0;
5719
+ } else if(ISDIGIT(c) || !parser_is_identchar()) {
5720
+ pushback(c);
5721
+ if(result == tIVAR) {
5722
+ rb_compile_error(parser_state,
5723
+ "`@%c' is not allowed as an instance variable name", c);
5724
+ } else {
5725
+ rb_compile_error(parser_state,
5726
+ "`@@%c' is not allowed as a class variable name", c);
5727
+ }
5728
+ return 0;
5729
+ }
5730
+
5731
+ if(tokadd_ident(parser_state, c)) return 0;
5732
+ SET_LEX_STATE(EXPR_END);
5733
+ tokenize_ident(parser_state, last_state);
5734
+ return result;
5735
+ }
5736
+
5737
+ static int parse_ident(rb_parser_state* parser_state, int c, int cmd_state) {
5738
+ int result = 0;
5739
+ int mb = ENC_CODERANGE_7BIT;
5740
+ const enum lex_state_e last_state = lex_state;
5741
+ ID ident;
5742
+
5743
+ do {
5744
+ if(!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
5745
+ if(tokadd_mbchar(c) == -1) return 0;
5746
+ c = nextc();
5747
+ } while(parser_is_identchar());
5748
+ if((c == '!' || c == '?') && !peek('=')) {
5749
+ tokadd(c);
5750
+ } else {
5751
+ pushback(c);
5752
+ }
5753
+ tokfix();
5754
+
5755
+ if(toklast() == '!' || toklast() == '?') {
5756
+ result = tFID;
5757
+ } else {
5758
+ if(lex_state_p(EXPR_FNAME)) {
5759
+ int c = nextc();
5760
+ if(c == '=' && !peek('~') && !peek('>') &&
5761
+ (!peek('=') || (peek_n('>', 1)))) {
5762
+ result = tIDENTIFIER;
5763
+ tokadd(c);
5764
+ tokfix();
5765
+ } else {
5766
+ pushback(c);
5767
+ }
5768
+ }
5769
+ if(result == 0 && ISUPPER(tok()[0])) {
5770
+ result = tCONSTANT;
5771
+ } else {
5772
+ result = tIDENTIFIER;
5773
+ }
5774
+ }
5775
+
5776
+ if(IS_LABEL_POSSIBLE()) {
5777
+ if(IS_LABEL_SUFFIX(0)) {
5778
+ SET_LEX_STATE(EXPR_ARG | EXPR_LABELED);
5779
+ nextc();
5780
+ set_yylval_name(TOK_INTERN());
5781
+ return tLABEL;
5782
+ }
5783
+ }
5784
+ if(mb == ENC_CODERANGE_7BIT && !lex_state_p(EXPR_DOT)) {
5785
+ const struct kwtable *kw;
5786
+
5787
+ /* See if it is a reserved word. */
5788
+ kw = rb_reserved_word(tok(), toklen());
5789
+ if(kw) {
5790
+ enum lex_state_e state = lex_state;
5791
+ SET_LEX_STATE(kw->state);
5792
+ if(lex_state_of_p(state, EXPR_FNAME)) {
5793
+ set_yylval_name(parser_intern2(tok(), toklen()));
5794
+ return kw->id[0];
5795
+ }
5796
+ if(lex_state_p(EXPR_BEG)) {
5797
+ command_start = TRUE;
5798
+ }
5799
+ if(kw->id[0] == keyword_do) {
5800
+ if(lpar_beg && lpar_beg == paren_nest) {
5801
+ lpar_beg = 0;
5802
+ --paren_nest;
5803
+ return keyword_do_LAMBDA;
5804
+ }
5805
+ if(COND_P()) return keyword_do_cond;
5806
+ if(CMDARG_P() && !lex_state_of_p(state, EXPR_CMDARG)) {
5807
+ return keyword_do_block;
5808
+ }
5809
+ if(lex_state_of_p(state, (EXPR_BEG | EXPR_ENDARG))) {
5810
+ return keyword_do_block;
5811
+ }
5812
+ return keyword_do;
5813
+ }
5814
+ if(lex_state_of_p(state, (EXPR_BEG | EXPR_LABELED))) {
5815
+ return kw->id[0];
5816
+ } else {
5817
+ if(kw->id[0] != kw->id[1]) {
5818
+ SET_LEX_STATE(EXPR_BEG | EXPR_LABEL);
5819
+ }
5820
+ return kw->id[1];
5821
+ }
5822
+ }
5823
+ }
5824
+
5825
+ if(lex_state_p(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) {
5826
+ if(cmd_state) {
5827
+ SET_LEX_STATE(EXPR_CMDARG);
5828
+ } else {
5829
+ SET_LEX_STATE(EXPR_ARG);
5830
+ }
5831
+ } else if(lex_state == EXPR_FNAME) {
5832
+ SET_LEX_STATE(EXPR_ENDFN);
5833
+ } else {
5834
+ SET_LEX_STATE(EXPR_END);
5835
+ }
5836
+
5837
+ ident = tokenize_ident(parser_state, last_state);
5838
+ if(!lex_state_of_p(last_state, EXPR_DOT | EXPR_FNAME) &&
5839
+ (result == tIDENTIFIER) && /* not EXPR_FNAME, not attrasgn */
5840
+ lvar_defined(ident)) {
5841
+ SET_LEX_STATE(EXPR_END | EXPR_LABEL);
5842
+ }
5843
+
5844
+ return result;
5845
+ }
5846
+
5847
+ static int parser_yylex(rb_parser_state* parser_state) {
5848
+ int c;
5849
+ int space_seen = 0;
5850
+ int cmd_state;
5851
+ int label;
5852
+ enum lex_state_e last_state;
5853
+ int fallthru = FALSE;
5854
+ int tok_seen = token_seen;
5855
+
5856
+ if(lex_strterm) {
5857
+ int token;
5858
+ if(nd_type(lex_strterm) == NODE_HEREDOC) {
5859
+ token = here_document(lex_strterm);
5860
+ if(token == tSTRING_END) {
5861
+ lex_strterm = 0;
5862
+ SET_LEX_STATE(EXPR_END);
5863
+ }
5864
+ } else {
5865
+ token = parse_string(lex_strterm);
5866
+ if(token == tSTRING_END && (lex_strterm->nd_func & STR_FUNC_LABEL)) {
4882
5867
  if(((lex_state_p(EXPR_BEG | EXPR_ENDFN) && !COND_P()) || IS_ARG()) && IS_LABEL_SUFFIX(0)) {
4883
5868
  nextc();
4884
5869
  token = tLABEL_END;
@@ -4886,7 +5871,7 @@ parser_yylex(rb_parser_state *parser_state)
4886
5871
  }
4887
5872
  if(token == tSTRING_END || token == tREGEXP_END || token == tLABEL_END) {
4888
5873
  lex_strterm = 0;
4889
- lex_state = token == tLABEL_END ? EXPR_BEG : EXPR_END;
5874
+ SET_LEX_STATE(token == tLABEL_END ? EXPR_BEG|EXPR_LABEL : EXPR_END);
4890
5875
  }
4891
5876
  }
4892
5877
  return token;
@@ -4894,6 +5879,7 @@ parser_yylex(rb_parser_state *parser_state)
4894
5879
 
4895
5880
  cmd_state = command_start;
4896
5881
  command_start = FALSE;
5882
+ token_seen = TRUE;
4897
5883
  retry:
4898
5884
  last_state = lex_state;
4899
5885
  switch(c = nextc()) {
@@ -4910,6 +5896,7 @@ retry:
4910
5896
  goto retry;
4911
5897
 
4912
5898
  case '#': /* it's a comment */
5899
+ token_seen = tok_seen;
4913
5900
  /* no magic_comment in shebang line */
4914
5901
  if(!parser_magic_comment(parser_state, lex_p, lex_pend - lex_p)) {
4915
5902
  if(comment_at_top(parser_state)) {
@@ -4918,9 +5905,17 @@ retry:
4918
5905
  }
4919
5906
 
4920
5907
  lex_p = lex_pend;
5908
+ fallthru = TRUE;
4921
5909
  /* fall through */
4922
5910
  case '\n':
4923
- if(lex_state_p(EXPR_BEG | EXPR_VALUE | EXPR_CLASS | EXPR_FNAME | EXPR_DOT)) {
5911
+ token_seen = tok_seen;
5912
+ c = (lex_state_p(EXPR_BEG | EXPR_CLASS | EXPR_FNAME | EXPR_DOT)
5913
+ && !lex_state_p(EXPR_LABELED));
5914
+ if(c || lex_state_all_p(EXPR_ARG | EXPR_LABELED)) {
5915
+ fallthru = FALSE;
5916
+ if(!c && in_kwarg) {
5917
+ goto normal_newline;
5918
+ }
4924
5919
  goto retry;
4925
5920
  }
4926
5921
 
@@ -4930,10 +5925,10 @@ retry:
4930
5925
  case '\13': /* '\v' */
4931
5926
  space_seen = 1;
4932
5927
  break;
5928
+ case '&':
4933
5929
  case '.': {
4934
- if((c = nextc()) != '.') {
5930
+ if(peek('.') == (c == '&')) {
4935
5931
  pushback(c);
4936
- pushback('.');
4937
5932
  goto retry;
4938
5933
  }
4939
5934
  }
@@ -4948,21 +5943,21 @@ retry:
4948
5943
 
4949
5944
  normal_newline:
4950
5945
  command_start = TRUE;
4951
- lex_state = EXPR_BEG;
5946
+ SET_LEX_STATE(EXPR_BEG);
4952
5947
  return '\n';
4953
5948
 
4954
5949
  case '*':
4955
5950
  if((c = nextc()) == '*') {
4956
5951
  if((c = nextc()) == '=') {
4957
5952
  set_yylval_id(tPOW);
4958
- lex_state = EXPR_BEG;
5953
+ SET_LEX_STATE(EXPR_BEG);
4959
5954
  return tOP_ASGN;
4960
5955
  }
4961
5956
  pushback(c);
4962
- if (IS_SPCARG(c)) {
5957
+ if(IS_SPCARG(c)) {
4963
5958
  rb_warning0("`**' interpreted as argument prefix");
4964
5959
  c = tDSTAR;
4965
- } else if (IS_BEG()) {
5960
+ } else if(IS_BEG()) {
4966
5961
  c = tDSTAR;
4967
5962
  } else {
4968
5963
  warn_balanced("**", "argument prefix");
@@ -4971,7 +5966,7 @@ retry:
4971
5966
  } else {
4972
5967
  if(c == '=') {
4973
5968
  set_yylval_id('*');
4974
- lex_state = EXPR_BEG;
5969
+ SET_LEX_STATE(EXPR_BEG);
4975
5970
  return tOP_ASGN;
4976
5971
  }
4977
5972
  pushback(c);
@@ -4985,18 +5980,18 @@ retry:
4985
5980
  c = '*';
4986
5981
  }
4987
5982
  }
4988
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
5983
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
4989
5984
  return c;
4990
5985
 
4991
5986
  case '!':
4992
5987
  c = nextc();
4993
5988
  if(IS_AFTER_OPERATOR()) {
4994
- lex_state = EXPR_ARG;
5989
+ SET_LEX_STATE(EXPR_ARG);
4995
5990
  if(c == '@') {
4996
5991
  return '!';
4997
5992
  }
4998
5993
  } else {
4999
- lex_state = EXPR_BEG;
5994
+ SET_LEX_STATE(EXPR_BEG);
5000
5995
  }
5001
5996
  if(c == '=') {
5002
5997
  return tNEQ;
@@ -5011,6 +6006,7 @@ retry:
5011
6006
  if(was_bol()) {
5012
6007
  /* skip embedded rd document */
5013
6008
  if(strncmp(lex_p, "begin", 5) == 0 && ISSPACE(lex_p[5])) {
6009
+ lex_goto_eol(parser_state);
5014
6010
  for(;;) {
5015
6011
  lex_goto_eol(parser_state);
5016
6012
  c = nextc();
@@ -5019,7 +6015,7 @@ retry:
5019
6015
  return 0;
5020
6016
  }
5021
6017
  if(c != '=') continue;
5022
- if(strncmp(lex_p, "end", 3) == 0 &&
6018
+ if(c == '=' && strncmp(lex_p, "end", 3) == 0 &&
5023
6019
  (lex_p + 3 == lex_pend || ISSPACE(lex_p[3]))) {
5024
6020
  break;
5025
6021
  }
@@ -5029,7 +6025,7 @@ retry:
5029
6025
  }
5030
6026
  }
5031
6027
 
5032
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
6028
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
5033
6029
  if((c = nextc()) == '=') {
5034
6030
  if((c = nextc()) == '=') {
5035
6031
  return tEQQ;
@@ -5051,17 +6047,17 @@ retry:
5051
6047
  if(c == '<' &&
5052
6048
  !lex_state_p(EXPR_DOT | EXPR_CLASS) &&
5053
6049
  !IS_END() &&
5054
- (!IS_ARG() || space_seen)) {
6050
+ (!IS_ARG() || lex_state_p(EXPR_LABELED) || space_seen)) {
5055
6051
  int token = heredoc_identifier();
5056
6052
  if(token) return token;
5057
6053
  }
5058
6054
  if(IS_AFTER_OPERATOR()) {
5059
- lex_state = EXPR_ARG;
6055
+ SET_LEX_STATE(EXPR_ARG);
5060
6056
  } else {
5061
6057
  if(lex_state_p(EXPR_CLASS)) {
5062
6058
  command_start = TRUE;
5063
6059
  }
5064
- lex_state = EXPR_BEG;
6060
+ SET_LEX_STATE(EXPR_BEG);
5065
6061
  }
5066
6062
  if(c == '=') {
5067
6063
  if((c = nextc()) == '>') {
@@ -5073,7 +6069,7 @@ retry:
5073
6069
  if(c == '<') {
5074
6070
  if((c = nextc()) == '=') {
5075
6071
  set_yylval_id(tLSHFT);
5076
- lex_state = EXPR_BEG;
6072
+ SET_LEX_STATE(EXPR_BEG);
5077
6073
  return tOP_ASGN;
5078
6074
  }
5079
6075
  pushback(c);
@@ -5084,14 +6080,14 @@ retry:
5084
6080
  return '<';
5085
6081
 
5086
6082
  case '>':
5087
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
6083
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
5088
6084
  if((c = nextc()) == '=') {
5089
6085
  return tGEQ;
5090
6086
  }
5091
6087
  if(c == '>') {
5092
6088
  if((c = nextc()) == '=') {
5093
6089
  set_yylval_id(tRSHFT);
5094
- lex_state = EXPR_BEG;
6090
+ SET_LEX_STATE(EXPR_BEG);
5095
6091
  return tOP_ASGN;
5096
6092
  }
5097
6093
  pushback(c);
@@ -5107,14 +6103,14 @@ retry:
5107
6103
 
5108
6104
  case '`':
5109
6105
  if(lex_state_p(EXPR_FNAME)) {
5110
- lex_state = EXPR_ENDFN;
6106
+ SET_LEX_STATE(EXPR_ENDFN);
5111
6107
  return c;
5112
6108
  }
5113
6109
  if(lex_state_p(EXPR_DOT)) {
5114
6110
  if(cmd_state) {
5115
- lex_state = EXPR_CMDARG;
6111
+ SET_LEX_STATE(EXPR_CMDARG);
5116
6112
  } else {
5117
- lex_state = EXPR_ARG;
6113
+ SET_LEX_STATE(EXPR_ARG);
5118
6114
  }
5119
6115
  return c;
5120
6116
  }
@@ -5127,93 +6123,25 @@ retry:
5127
6123
  return tSTRING_BEG;
5128
6124
 
5129
6125
  case '?':
5130
- if(IS_END()) {
5131
- lex_state = EXPR_VALUE;
5132
- return '?';
5133
- }
5134
- c = nextc();
5135
- if(c == -1) {
5136
- rb_compile_error(parser_state, "incomplete character syntax");
5137
- return 0;
5138
- }
5139
- if(parser_enc_isspace(c, parser_state->enc)) {
5140
- if(!IS_ARG()){
5141
- int c2 = 0;
5142
- switch(c) {
5143
- case ' ':
5144
- c2 = 's';
5145
- break;
5146
- case '\n':
5147
- c2 = 'n';
5148
- break;
5149
- case '\t':
5150
- c2 = 't';
5151
- break;
5152
- case '\v':
5153
- c2 = 'v';
5154
- break;
5155
- case '\r':
5156
- c2 = 'r';
5157
- break;
5158
- case '\f':
5159
- c2 = 'f';
5160
- break;
5161
- }
5162
- if(c2) {
5163
- rb_warn("invalid character syntax; use ?\\%c", c2);
5164
- }
5165
- }
5166
- ternary:
5167
- pushback(c);
5168
- lex_state = EXPR_VALUE;
5169
- return '?';
5170
- }
5171
-
5172
- newtok();
5173
- enc = parser_state->enc;
5174
- if(!parser_isascii()) {
5175
- if(tokadd_mbchar(c) == -1) return 0;
5176
- } else if((parser_enc_isalnum(c, parser_state->enc) || c == '_') &&
5177
- lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser_state->enc)) {
5178
- goto ternary;
5179
- } else if(c == '\\') {
5180
- if(peek('u')) {
5181
- nextc();
5182
- c = parser_tokadd_utf8(parser_state, &enc, 0, 0, 0);
5183
- if(0x80 <= c) {
5184
- tokaddmbc(c, enc);
5185
- } else {
5186
- tokadd(c);
5187
- }
5188
- } else if(!lex_eol_p() && !(c = *lex_p, ISASCII(c))) {
5189
- nextc();
5190
- if(tokadd_mbchar(c) == -1) return 0;
5191
- } else {
5192
- c = read_escape(0, &enc);
5193
- tokadd(c);
5194
- }
5195
- } else {
5196
- tokadd(c);
5197
- }
5198
- tokfix();
5199
- set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0));
5200
- lex_state = EXPR_END;
5201
- return tCHAR;
6126
+ return parse_qmark(parser_state);
5202
6127
 
5203
6128
  case '&':
5204
6129
  if((c = nextc()) == '&') {
5205
- lex_state = EXPR_BEG;
6130
+ SET_LEX_STATE(EXPR_BEG);
5206
6131
  if((c = nextc()) == '=') {
5207
6132
  set_yylval_id(tANDOP);
5208
- lex_state = EXPR_BEG;
6133
+ SET_LEX_STATE(EXPR_BEG);
5209
6134
  return tOP_ASGN;
5210
6135
  }
5211
6136
  pushback(c);
5212
6137
  return tANDOP;
5213
6138
  } else if(c == '=') {
5214
6139
  set_yylval_id('&');
5215
- lex_state = EXPR_BEG;
6140
+ SET_LEX_STATE(EXPR_BEG);
5216
6141
  return tOP_ASGN;
6142
+ } else if(c == '.') {
6143
+ SET_LEX_STATE(EXPR_DOT);
6144
+ return tANDDOT;
5217
6145
  }
5218
6146
  pushback(c);
5219
6147
  if(IS_SPCARG(c)){
@@ -5225,15 +6153,15 @@ retry:
5225
6153
  warn_balanced("&", "argument prefix");
5226
6154
  c = '&';
5227
6155
  }
5228
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
6156
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
5229
6157
  return c;
5230
6158
 
5231
6159
  case '|':
5232
6160
  if((c = nextc()) == '|') {
5233
- lex_state = EXPR_BEG;
6161
+ SET_LEX_STATE(EXPR_BEG);
5234
6162
  if((c = nextc()) == '=') {
5235
6163
  set_yylval_id(tOROP);
5236
- lex_state = EXPR_BEG;
6164
+ SET_LEX_STATE(EXPR_BEG);
5237
6165
  return tOP_ASGN;
5238
6166
  }
5239
6167
  pushback(c);
@@ -5241,335 +6169,92 @@ retry:
5241
6169
  }
5242
6170
  if(c == '=') {
5243
6171
  set_yylval_id('|');
5244
- lex_state = EXPR_BEG;
6172
+ SET_LEX_STATE(EXPR_BEG);
5245
6173
  return tOP_ASGN;
5246
6174
  }
5247
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
6175
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG|EXPR_LABEL);
5248
6176
  pushback(c);
5249
6177
  return '|';
5250
6178
 
5251
- case '+':
5252
- c = nextc();
5253
- if(IS_AFTER_OPERATOR()) {
5254
- lex_state = EXPR_ARG;
5255
- if(c == '@') {
5256
- return tUPLUS;
5257
- }
5258
- pushback(c);
5259
- return '+';
5260
- }
5261
- if(c == '=') {
5262
- set_yylval_id('+');
5263
- lex_state = EXPR_BEG;
5264
- return tOP_ASGN;
5265
- }
5266
- if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) {
5267
- lex_state = EXPR_BEG;
5268
- pushback(c);
5269
- if(c != -1 && ISDIGIT(c)) {
5270
- c = '+';
5271
- goto start_num;
5272
- }
5273
- return tUPLUS;
5274
- }
5275
- lex_state = EXPR_BEG;
5276
- pushback(c);
5277
- warn_balanced("+", "unary operator");
5278
- return '+';
5279
-
5280
- case '-':
5281
- c = nextc();
5282
- if(IS_AFTER_OPERATOR()) {
5283
- lex_state = EXPR_ARG;
5284
- if(c == '@') {
5285
- return tUMINUS;
5286
- }
5287
- pushback(c);
5288
- return '-';
5289
- }
5290
- if(c == '=') {
5291
- set_yylval_id('-');
5292
- lex_state = EXPR_BEG;
5293
- return tOP_ASGN;
5294
- }
5295
- if(c == '>') {
5296
- lex_state = EXPR_ENDFN;
5297
- return tLAMBDA;
5298
- }
5299
- if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) {
5300
- lex_state = EXPR_BEG;
5301
- pushback(c);
5302
- if(c != -1 && ISDIGIT(c)) {
5303
- return tUMINUS_NUM;
5304
- }
5305
- return tUMINUS;
5306
- }
5307
- lex_state = EXPR_BEG;
5308
- pushback(c);
5309
- warn_balanced("-", "unary operator");
5310
- return '-';
5311
-
5312
- case '.':
5313
- lex_state = EXPR_BEG;
5314
- if((c = nextc()) == '.') {
5315
- if((c = nextc()) == '.') {
5316
- return tDOT3;
5317
- }
5318
- pushback(c);
5319
- return tDOT2;
5320
- }
5321
- pushback(c);
5322
- if(c != -1 && ISDIGIT(c)) {
5323
- yy_error("no .<digit> floating literal anymore; put 0 before dot");
5324
- }
5325
- lex_state = EXPR_DOT;
5326
- return '.';
5327
-
5328
- start_num:
5329
- case '0': case '1': case '2': case '3': case '4':
5330
- case '5': case '6': case '7': case '8': case '9':
5331
- {
5332
- int is_float, seen_point, seen_e, nondigit, suffix;
5333
-
5334
- is_float = seen_point = seen_e = nondigit = 0;
5335
- lex_state = EXPR_END;
5336
- newtok();
5337
- if(c == '-' || c == '+') {
5338
- tokadd(c);
5339
- c = nextc();
5340
- }
5341
- if(c == '0') {
5342
- #define no_digits() do {yy_error("numeric literal without digits"); return 0;} while(0)
5343
- int start = toklen();
5344
- c = nextc();
5345
- if(c == 'x' || c == 'X') {
5346
- /* hexadecimal */
5347
- c = nextc();
5348
- if(c != -1 && ISXDIGIT(c)) {
5349
- do {
5350
- if(c == '_') {
5351
- if(nondigit) break;
5352
- nondigit = c;
5353
- continue;
5354
- }
5355
- if(!ISXDIGIT(c)) break;
5356
- nondigit = 0;
5357
- tokadd(c);
5358
- } while((c = nextc()) != -1);
5359
- }
5360
- pushback(c);
5361
- tokfix();
5362
- if(toklen() == start) {
5363
- no_digits();
5364
- } else if(nondigit) {
5365
- goto trailing_uc;
5366
- }
5367
- suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5368
- return set_integer_literal(rb_cstr_to_inum(tok(), 16, FALSE), suffix);
5369
- }
5370
-
5371
- if(c == 'b' || c == 'B') {
5372
- /* binary */
5373
- c = nextc();
5374
- if(c == '0' || c == '1') {
5375
- do {
5376
- if(c == '_') {
5377
- if(nondigit) break;
5378
- nondigit = c;
5379
- continue;
5380
- }
5381
- if(c != '0' && c != '1') break;
5382
- nondigit = 0;
5383
- tokadd(c);
5384
- } while((c = nextc()) != -1);
5385
- }
5386
- pushback(c);
5387
- tokfix();
5388
- if(toklen() == start) {
5389
- no_digits();
5390
- } else if(nondigit) {
5391
- goto trailing_uc;
5392
- }
5393
- suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5394
- return set_integer_literal(rb_cstr_to_inum(tok(), 2, FALSE), suffix);
5395
- }
5396
-
5397
- if(c == 'd' || c == 'D') {
5398
- /* decimal */
5399
- c = nextc();
5400
- if(c != -1 && ISDIGIT(c)) {
5401
- do {
5402
- if(c == '_') {
5403
- if(nondigit) break;
5404
- nondigit = c;
5405
- continue;
5406
- }
5407
- if(!ISDIGIT(c)) break;
5408
- nondigit = 0;
5409
- tokadd(c);
5410
- } while((c = nextc()) != -1);
5411
- }
5412
- pushback(c);
5413
- tokfix();
5414
- if(toklen() == start) {
5415
- no_digits();
5416
- } else if(nondigit) {
5417
- goto trailing_uc;
5418
- }
5419
- suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5420
- return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
5421
- }
5422
-
5423
- if(c == '_') {
5424
- /* 0_0 */
5425
- goto octal_number;
5426
- }
5427
-
5428
- if(c == 'o' || c == 'O') {
5429
- /* prefixed octal */
5430
- c = nextc();
5431
- if(c == -1 || c == '_' || !ISDIGIT(c)) {
5432
- no_digits();
5433
- }
5434
- }
5435
-
5436
- if(c >= '0' && c <= '7') {
5437
- /* octal */
5438
- octal_number:
5439
- do {
5440
- if(c == '_') {
5441
- if(nondigit) break;
5442
- nondigit = c;
5443
- continue;
5444
- }
5445
- if(c < '0' || c > '9') break;
5446
- if(c > '7') goto invalid_octal;
5447
- nondigit = 0;
5448
- tokadd(c);
5449
- } while((c = nextc()) != -1);
5450
-
5451
- if(toklen() > start) {
5452
- pushback(c);
5453
- tokfix();
5454
- if(nondigit) goto trailing_uc;
5455
- suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5456
- return set_integer_literal(rb_cstr_to_inum(tok(), 8, FALSE), suffix);
5457
- }
5458
- if(nondigit) {
5459
- pushback(c);
5460
- goto trailing_uc;
5461
- }
5462
- }
5463
-
5464
- if(c > '7' && c <= '9') {
5465
- invalid_octal:
5466
- yy_error("Invalid octal digit");
5467
- } else if(c == '.' || c == 'e' || c == 'E') {
5468
- tokadd('0');
5469
- } else {
5470
- pushback(c);
5471
- suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5472
- return set_integer_literal(INT2FIX(0), suffix);
5473
- }
5474
- }
5475
-
5476
- for(;;) {
5477
- switch(c) {
5478
- case '0': case '1': case '2': case '3': case '4':
5479
- case '5': case '6': case '7': case '8': case '9':
5480
- nondigit = 0;
5481
- tokadd(c);
5482
- break;
5483
-
5484
- case '.':
5485
- if(nondigit) goto trailing_uc;
5486
- if(seen_point || seen_e) {
5487
- goto decode_num;
5488
- } else {
5489
- int c0 = nextc();
5490
- if(c0 == -1 || !ISDIGIT(c0)) {
5491
- pushback(c0);
5492
- goto decode_num;
5493
- }
5494
- c = c0;
5495
- }
5496
- seen_point = toklen();
5497
- tokadd('.');
5498
- tokadd(c);
5499
- is_float++;
5500
- nondigit = 0;
5501
- break;
5502
-
5503
- case 'e':
5504
- case 'E':
5505
- if(nondigit) {
5506
- pushback(c);
5507
- c = nondigit;
5508
- goto decode_num;
5509
- }
5510
- if(seen_e) {
5511
- goto decode_num;
5512
- }
5513
- nondigit = c;
5514
- c = nextc();
5515
- if(c != '-' && c != '+' && !ISDIGIT(c)) {
5516
- pushback(c);
5517
- nondigit = 0;
5518
- goto decode_num;
5519
- }
5520
- tokadd(nondigit);
5521
- seen_e++;
5522
- is_float++;
5523
- tokadd(c);
5524
- nondigit = (c == '-' || c == '+') ? c : 0;
5525
- break;
5526
-
5527
- case '_': /* `_' in number just ignored */
5528
- if(nondigit) goto decode_num;
5529
- nondigit = c;
5530
- break;
5531
-
5532
- default:
5533
- goto decode_num;
6179
+ case '+':
6180
+ c = nextc();
6181
+ if(IS_AFTER_OPERATOR()) {
6182
+ SET_LEX_STATE(EXPR_ARG);
6183
+ if(c == '@') {
6184
+ return tUPLUS;
5534
6185
  }
5535
- c = nextc();
6186
+ pushback(c);
6187
+ return '+';
6188
+ }
6189
+ if(c == '=') {
6190
+ set_yylval_id('+');
6191
+ SET_LEX_STATE(EXPR_BEG);
6192
+ return tOP_ASGN;
6193
+ }
6194
+ if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous('+'))) {
6195
+ SET_LEX_STATE(EXPR_BEG);
6196
+ pushback(c);
6197
+ if(c != -1 && ISDIGIT(c)) {
6198
+ return parse_numeric(parser_state, '+');
6199
+ }
6200
+ return tUPLUS;
5536
6201
  }
6202
+ SET_LEX_STATE(EXPR_BEG);
6203
+ pushback(c);
6204
+ warn_balanced("+", "unary operator");
6205
+ return '+';
5537
6206
 
5538
- decode_num:
6207
+ case '-':
6208
+ c = nextc();
6209
+ if(IS_AFTER_OPERATOR()) {
6210
+ SET_LEX_STATE(EXPR_ARG);
6211
+ if(c == '@') {
6212
+ return tUMINUS;
6213
+ }
6214
+ pushback(c);
6215
+ return '-';
6216
+ }
6217
+ if(c == '=') {
6218
+ set_yylval_id('-');
6219
+ SET_LEX_STATE(EXPR_BEG);
6220
+ return tOP_ASGN;
6221
+ }
6222
+ if(c == '>') {
6223
+ SET_LEX_STATE(EXPR_ENDFN);
6224
+ return tLAMBDA;
6225
+ }
6226
+ if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous('-'))) {
6227
+ SET_LEX_STATE(EXPR_BEG);
6228
+ pushback(c);
6229
+ if(c != -1 && ISDIGIT(c)) {
6230
+ return tUMINUS_NUM;
6231
+ }
6232
+ return tUMINUS;
6233
+ }
6234
+ SET_LEX_STATE(EXPR_BEG);
5539
6235
  pushback(c);
5540
- if(nondigit) {
5541
- char tmp[30];
5542
- trailing_uc:
5543
- snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit);
5544
- yy_error(tmp);
5545
- }
5546
- tokfix();
5547
- if(is_float) {
5548
- int type = tFLOAT;
5549
- VALUE v;
5550
-
5551
- suffix = number_literal_suffix(seen_e ? NUM_SUFFIX_I : NUM_SUFFIX_ALL);
5552
- if(suffix & NUM_SUFFIX_R) {
5553
- char *point = &tok()[seen_point];
5554
- size_t fraclen = toklen()-seen_point-1;
5555
- type = tRATIONAL;
5556
- memmove(point, point+1, fraclen+1);
5557
- v = rb_cstr_to_inum(tok(), 10, FALSE);
5558
- v = rb_rational_new(v,
5559
- rb_funcall(INT2FIX(10), rb_intern("**"), 1, INT2NUM(fraclen)));
5560
- } else {
5561
- double d = strtod(tok(), 0);
5562
- if(errno == ERANGE) {
5563
- rb_warningS("Float %s out of range", tok());
5564
- errno = 0;
5565
- }
5566
- v = rb_float_new(d);
6236
+ warn_balanced("-", "unary operator");
6237
+ return '-';
6238
+
6239
+ case '.':
6240
+ SET_LEX_STATE(EXPR_BEG);
6241
+ if((c = nextc()) == '.') {
6242
+ if((c = nextc()) == '.') {
6243
+ return tDOT3;
5567
6244
  }
5568
- return set_number_literal(v, type, suffix);
6245
+ pushback(c);
6246
+ return tDOT2;
5569
6247
  }
5570
- suffix = number_literal_suffix(NUM_SUFFIX_ALL);
5571
- return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
5572
- }
6248
+ pushback(c);
6249
+ if(c != -1 && ISDIGIT(c)) {
6250
+ yy_error("no .<digit> floating literal anymore; put 0 before dot");
6251
+ }
6252
+ SET_LEX_STATE(EXPR_DOT);
6253
+ return '.';
6254
+
6255
+ case '0': case '1': case '2': case '3': case '4':
6256
+ case '5': case '6': case '7': case '8': case '9':
6257
+ return parse_numeric(parser_state, c);
5573
6258
 
5574
6259
  case ')':
5575
6260
  case ']':
@@ -5578,12 +6263,12 @@ retry:
5578
6263
  COND_LEXPOP();
5579
6264
  CMDARG_LEXPOP();
5580
6265
  if(c == ')') {
5581
- lex_state = EXPR_ENDFN;
6266
+ SET_LEX_STATE(EXPR_ENDFN);
5582
6267
  } else {
5583
- lex_state = EXPR_ENDARG;
6268
+ SET_LEX_STATE(EXPR_ENDARG);
5584
6269
  }
5585
6270
  if(c == '}') {
5586
- if (!brace_nest--) c = tSTRING_DEND;
6271
+ if(!brace_nest--) c = tSTRING_DEND;
5587
6272
  }
5588
6273
  return c;
5589
6274
 
@@ -5591,16 +6276,16 @@ retry:
5591
6276
  c = nextc();
5592
6277
  if(c == ':') {
5593
6278
  if(IS_BEG() || lex_state_p(EXPR_CLASS) || IS_SPCARG(-1)) {
5594
- lex_state = EXPR_BEG;
6279
+ SET_LEX_STATE(EXPR_BEG);
5595
6280
  return tCOLON3;
5596
6281
  }
5597
- lex_state = EXPR_DOT;
6282
+ SET_LEX_STATE(EXPR_DOT);
5598
6283
  return tCOLON2;
5599
6284
  }
5600
- if(IS_END() || ISSPACE(c)) {
6285
+ if(IS_END() || ISSPACE(c) || c == '#') {
5601
6286
  pushback(c);
5602
6287
  warn_balanced(":", "symbol literal");
5603
- lex_state = EXPR_BEG;
6288
+ SET_LEX_STATE(EXPR_BEG);
5604
6289
  return ':';
5605
6290
  }
5606
6291
  switch(c) {
@@ -5614,46 +6299,46 @@ retry:
5614
6299
  pushback(c);
5615
6300
  break;
5616
6301
  }
5617
- lex_state = EXPR_FNAME;
6302
+ SET_LEX_STATE(EXPR_FNAME);
5618
6303
  return tSYMBEG;
5619
6304
 
5620
6305
  case '/':
5621
- if(lex_state_p(EXPR_BEG_ANY)) {
6306
+ if(IS_BEG()) {
5622
6307
  lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
5623
6308
  return tREGEXP_BEG;
5624
6309
  }
5625
6310
  if((c = nextc()) == '=') {
5626
6311
  set_yylval_id('/');
5627
- lex_state = EXPR_BEG;
6312
+ SET_LEX_STATE(EXPR_BEG);
5628
6313
  return tOP_ASGN;
5629
6314
  }
5630
6315
  pushback(c);
5631
6316
  if(IS_SPCARG(c)) {
5632
- (void)arg_ambiguous();
6317
+ (void)arg_ambiguous('/');
5633
6318
  lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
5634
6319
  return tREGEXP_BEG;
5635
6320
  }
5636
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
6321
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
5637
6322
  warn_balanced("/", "regexp literal");
5638
6323
  return '/';
5639
6324
 
5640
6325
  case '^':
5641
6326
  if((c = nextc()) == '=') {
5642
6327
  set_yylval_id('^');
5643
- lex_state = EXPR_BEG;
6328
+ SET_LEX_STATE(EXPR_BEG);
5644
6329
  return tOP_ASGN;
5645
6330
  }
5646
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
6331
+ SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
5647
6332
  pushback(c);
5648
6333
  return '^';
5649
6334
 
5650
6335
  case ';':
5651
- lex_state = EXPR_BEG;
6336
+ SET_LEX_STATE(EXPR_BEG);
5652
6337
  command_start = TRUE;
5653
6338
  return ';';
5654
6339
 
5655
6340
  case ',':
5656
- lex_state = EXPR_BEG;
6341
+ SET_LEX_STATE(EXPR_BEG | EXPR_LABEL);
5657
6342
  return ',';
5658
6343
 
5659
6344
  case '~':
@@ -5661,9 +6346,9 @@ retry:
5661
6346
  if((c = nextc()) != '@') {
5662
6347
  pushback(c);
5663
6348
  }
5664
- lex_state = EXPR_ARG;
6349
+ SET_LEX_STATE(EXPR_ARG);
5665
6350
  } else {
5666
- lex_state = EXPR_BEG;
6351
+ SET_LEX_STATE(EXPR_BEG);
5667
6352
  }
5668
6353
  return '~';
5669
6354
 
@@ -5676,13 +6361,13 @@ retry:
5676
6361
  paren_nest++;
5677
6362
  COND_PUSH(0);
5678
6363
  CMDARG_PUSH(0);
5679
- lex_state = EXPR_BEG;
6364
+ SET_LEX_STATE(EXPR_BEG | EXPR_LABEL);
5680
6365
  return c;
5681
6366
 
5682
6367
  case '[':
5683
6368
  paren_nest++;
5684
6369
  if(IS_AFTER_OPERATOR()) {
5685
- lex_state = EXPR_ARG;
6370
+ SET_LEX_STATE(EXPR_ARG);
5686
6371
  if((c = nextc()) == ']') {
5687
6372
  if((c = nextc()) == '=') {
5688
6373
  return tASET;
@@ -5691,13 +6376,14 @@ retry:
5691
6376
  return tAREF;
5692
6377
  }
5693
6378
  pushback(c);
6379
+ SET_LEX_STATE(lex_state | EXPR_LABEL);
5694
6380
  return '[';
5695
6381
  } else if(IS_BEG()) {
5696
6382
  c = tLBRACK;
5697
- } else if(IS_ARG() && space_seen) {
6383
+ } else if(IS_ARG() && (space_seen || lex_state_p(EXPR_LABELED))) {
5698
6384
  c = tLBRACK;
5699
6385
  }
5700
- lex_state = EXPR_BEG;
6386
+ SET_LEX_STATE(EXPR_BEG|EXPR_LABEL);
5701
6387
  COND_PUSH(0);
5702
6388
  CMDARG_PUSH(0);
5703
6389
  return c;
@@ -5705,14 +6391,16 @@ retry:
5705
6391
  case '{':
5706
6392
  ++brace_nest;
5707
6393
  if(lpar_beg && lpar_beg == paren_nest) {
5708
- lex_state = EXPR_BEG;
6394
+ SET_LEX_STATE(EXPR_BEG);
5709
6395
  lpar_beg = 0;
5710
6396
  --paren_nest;
5711
6397
  COND_PUSH(0);
5712
6398
  CMDARG_PUSH(0);
5713
6399
  return tLAMBEG;
5714
6400
  }
5715
- if(IS_ARG() || lex_state_p(EXPR_END | EXPR_ENDFN)) {
6401
+ if(lex_state_p(EXPR_LABELED)) {
6402
+ c = tLBRACE; /* hash */
6403
+ } else if(lex_state_p(EXPR_ARG_ANY | EXPR_END | EXPR_ENDFN)) {
5716
6404
  c = '{'; /* block (primary) */
5717
6405
  } else if(lex_state_p(EXPR_ENDARG)) {
5718
6406
  c = tLBRACE_ARG; /* block (expr) */
@@ -5721,7 +6409,8 @@ retry:
5721
6409
  }
5722
6410
  COND_PUSH(0);
5723
6411
  CMDARG_PUSH(0);
5724
- lex_state = EXPR_BEG;
6412
+ SET_LEX_STATE(EXPR_BEG);
6413
+ if(c != tLBRACE_ARG) SET_LEX_STATE(lex_state | EXPR_LABEL);
5725
6414
  if(c != tLBRACE) command_start = TRUE;
5726
6415
  return c;
5727
6416
 
@@ -5735,206 +6424,13 @@ retry:
5735
6424
  return '\\';
5736
6425
 
5737
6426
  case '%':
5738
- if(lex_state_p(EXPR_BEG_ANY)) {
5739
- intptr_t term;
5740
- intptr_t paren;
5741
-
5742
- c = nextc();
5743
- quotation:
5744
- if(c == -1 || !ISALNUM(c)) {
5745
- term = c;
5746
- c = 'Q';
5747
- } else {
5748
- term = nextc();
5749
- if(parser_enc_isalnum((int)term, parser_state->enc) || !parser_isascii()) {
5750
- yy_error("unknown type of % string");
5751
- return 0;
5752
- }
5753
- }
5754
- if(c == -1 || term == -1) {
5755
- rb_compile_error(parser_state, "unterminated quoted string meets end of file");
5756
- return 0;
5757
- }
5758
- paren = term;
5759
- if(term == '(') term = ')';
5760
- else if(term == '[') term = ']';
5761
- else if(term == '{') term = '}';
5762
- else if(term == '<') term = '>';
5763
- else paren = 0;
5764
-
5765
- switch(c) {
5766
- case 'Q':
5767
- lex_strterm = NEW_STRTERM(str_dquote, term, paren);
5768
- return tSTRING_BEG;
5769
-
5770
- case 'q':
5771
- lex_strterm = NEW_STRTERM(str_squote, term, paren);
5772
- return tSTRING_BEG;
5773
-
5774
- case 'W':
5775
- lex_strterm = NEW_STRTERM(str_dword, term, paren);
5776
- do {c = nextc();} while(ISSPACE(c));
5777
- pushback(c);
5778
- return tWORDS_BEG;
5779
-
5780
- case 'w':
5781
- lex_strterm = NEW_STRTERM(str_sword, term, paren);
5782
- do {c = nextc();} while(ISSPACE(c));
5783
- pushback(c);
5784
- return tQWORDS_BEG;
5785
-
5786
- case 'I':
5787
- lex_strterm = NEW_STRTERM(str_dword, term, paren);
5788
- do {c = nextc();} while (ISSPACE(c));
5789
- pushback(c);
5790
- return tSYMBOLS_BEG;
5791
-
5792
- case 'i':
5793
- lex_strterm = NEW_STRTERM(str_sword, term, paren);
5794
- do {c = nextc();} while (ISSPACE(c));
5795
- pushback(c);
5796
- return tQSYMBOLS_BEG;
5797
-
5798
- case 'x':
5799
- lex_strterm = NEW_STRTERM(str_xquote, term, paren);
5800
- return tXSTRING_BEG;
5801
-
5802
- case 'r':
5803
- lex_strterm = NEW_STRTERM(str_regexp, term, paren);
5804
- return tREGEXP_BEG;
5805
-
5806
- case 's':
5807
- lex_strterm = NEW_STRTERM(str_ssym, term, paren);
5808
- lex_state = EXPR_FNAME;
5809
- return tSYMBEG;
5810
-
5811
- default:
5812
- yy_error("unknown type of % string");
5813
- return 0;
5814
- }
5815
- }
5816
- if((c = nextc()) == '=') {
5817
- set_yylval_id('%');
5818
- lex_state = EXPR_BEG;
5819
- return tOP_ASGN;
5820
- }
5821
- if(IS_SPCARG(c)) {
5822
- goto quotation;
5823
- }
5824
- lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
5825
- pushback(c);
5826
- warn_balanced("%%", "string literal");
5827
- return '%';
6427
+ return parse_percent(parser_state, space_seen, last_state);
5828
6428
 
5829
6429
  case '$':
5830
- lex_state = EXPR_END;
5831
- newtok();
5832
- c = nextc();
5833
- switch(c) {
5834
- case '_': /* $_: last read line string */
5835
- c = nextc();
5836
- if(parser_is_identchar()) {
5837
- tokadd('$');
5838
- tokadd('_');
5839
- break;
5840
- }
5841
- pushback(c);
5842
- c = '_';
5843
- /* fall through */
5844
- case '~': /* $~: match-data */
5845
- case '*': /* $*: argv */
5846
- case '$': /* $$: pid */
5847
- case '?': /* $?: last status */
5848
- case '!': /* $!: error string */
5849
- case '@': /* $@: error position */
5850
- case '/': /* $/: input record separator */
5851
- case '\\': /* $\: output record separator */
5852
- case ';': /* $;: field separator */
5853
- case ',': /* $,: output field separator */
5854
- case '.': /* $.: last read line number */
5855
- case '=': /* $=: ignorecase */
5856
- case ':': /* $:: load path */
5857
- case '<': /* $<: reading filename */
5858
- case '>': /* $>: default output handle */
5859
- case '\"': /* $": already loaded files */
5860
- tokadd('$');
5861
- tokadd(c);
5862
- goto gvar;
5863
-
5864
- case '-':
5865
- tokadd('$');
5866
- tokadd(c);
5867
- c = nextc();
5868
- if(parser_is_identchar()) {
5869
- if(tokadd_mbchar(c) == -1) return 0;
5870
- } else {
5871
- pushback(c);
5872
- pushback('-');
5873
- return '$';
5874
- }
5875
- gvar:
5876
- tokfix();
5877
- // TODO rb_intern3(tok(), tokidx, current_enc);
5878
- set_yylval_name(parser_intern(tok()));
5879
- return tGVAR;
5880
-
5881
- case '&': /* $&: last match */
5882
- case '`': /* $`: string before last match */
5883
- case '\'': /* $': string after last match */
5884
- case '+': /* $+: string matches last paren. */
5885
- if(lex_state_of_p(last_state, EXPR_FNAME)) {
5886
- tokadd('$');
5887
- tokadd(c);
5888
- goto gvar;
5889
- }
5890
- set_yylval_node(NEW_BACK_REF(c));
5891
- return tBACK_REF;
5892
-
5893
- case '1': case '2': case '3':
5894
- case '4': case '5': case '6':
5895
- case '7': case '8': case '9':
5896
- tokadd('$');
5897
- do {
5898
- tokadd(c);
5899
- c = nextc();
5900
- } while(c != -1 && ISDIGIT(c));
5901
- pushback(c);
5902
- if(lex_state_of_p(last_state, EXPR_FNAME)) goto gvar;
5903
- tokfix();
5904
- set_yylval_node(NEW_NTH_REF(atoi(tok()+1)));
5905
- return tNTH_REF;
5906
-
5907
- default:
5908
- if(!parser_is_identchar()) {
5909
- pushback(c);
5910
- rb_compile_error(parser_state,
5911
- "`$%c' is not allowed as a global variable name", c);
5912
- return 0;
5913
- }
5914
- case '0':
5915
- tokadd('$');
5916
- }
5917
- break;
6430
+ return parse_gvar(parser_state, last_state);
5918
6431
 
5919
6432
  case '@':
5920
- c = nextc();
5921
- newtok();
5922
- tokadd('@');
5923
- if(c == '@') {
5924
- tokadd('@');
5925
- c = nextc();
5926
- }
5927
- if(c != -1 && (ISDIGIT(c) || !parser_is_identchar())) {
5928
- if(tokidx == 1) {
5929
- rb_compile_error(parser_state,
5930
- "`@%c' is not allowed as an instance variable name", c);
5931
- } else {
5932
- rb_compile_error(parser_state,
5933
- "`@@%c' is not allowed as a class variable name", c);
5934
- }
5935
- return 0;
5936
- }
5937
- break;
6433
+ return parse_atmark(parser_state, last_state);
5938
6434
 
5939
6435
  case '_':
5940
6436
  if(was_bol() && whole_match_p("__END__", 7, 0)) {
@@ -5955,132 +6451,7 @@ retry:
5955
6451
  break;
5956
6452
  }
5957
6453
 
5958
- mb = ENC_CODERANGE_7BIT;
5959
- do {
5960
- if(!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
5961
- if(tokadd_mbchar(c) == -1) return 0;
5962
- c = nextc();
5963
- } while(parser_is_identchar());
5964
- switch(tok()[0]) {
5965
- case '@': case '$':
5966
- pushback(c);
5967
- break;
5968
- default:
5969
- if((c == '!' || c == '?') && !peek('=')) {
5970
- tokadd(c);
5971
- } else {
5972
- pushback(c);
5973
- }
5974
- }
5975
- tokfix();
5976
- {
5977
- int result = 0;
5978
-
5979
- last_state = lex_state;
5980
- switch(tok()[0]) {
5981
- case '$':
5982
- lex_state = EXPR_END;
5983
- result = tGVAR;
5984
- break;
5985
- case '@':
5986
- lex_state = EXPR_END;
5987
- if(tok()[1] == '@') {
5988
- result = tCVAR;
5989
- } else {
5990
- result = tIVAR;
5991
- }
5992
- break;
5993
- default:
5994
- if(toklast() == '!' || toklast() == '?') {
5995
- result = tFID;
5996
- } else {
5997
- if(lex_state_p(EXPR_FNAME)) {
5998
- if((c = nextc()) == '=' && !peek('~') && !peek('>') &&
5999
- (!peek('=') || (peek_n('>', 1)))) {
6000
- result = tIDENTIFIER;
6001
- tokadd(c);
6002
- tokfix();
6003
- } else {
6004
- pushback(c);
6005
- }
6006
- }
6007
- if(result == 0 && ISUPPER(tok()[0])) {
6008
- result = tCONSTANT;
6009
- } else {
6010
- result = tIDENTIFIER;
6011
- }
6012
- }
6013
-
6014
- if(IS_LABEL_POSSIBLE()) {
6015
- if(IS_LABEL_SUFFIX(0)) {
6016
- lex_state = EXPR_BEG;
6017
- nextc();
6018
- set_yylval_name(TOK_INTERN(!ENC_SINGLE(mb)));
6019
- return tLABEL;
6020
- }
6021
- }
6022
- if(mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
6023
- const struct kwtable *kw;
6024
-
6025
- /* See if it is a reserved word. */
6026
- kw = reserved_word(tok(), toklen());
6027
- if(kw) {
6028
- enum lex_state_e state = lex_state;
6029
- lex_state = kw->state;
6030
- if(lex_state_of_p(state, EXPR_FNAME)) {
6031
- set_yylval_name(parser_intern(kw->name));
6032
- return kw->id[0];
6033
- }
6034
- if(lex_state_p(EXPR_BEG)) {
6035
- command_start = TRUE;
6036
- }
6037
- if(kw->id[0] == keyword_do) {
6038
- if(lpar_beg && lpar_beg == paren_nest) {
6039
- lpar_beg = 0;
6040
- --paren_nest;
6041
- return keyword_do_LAMBDA;
6042
- }
6043
- if(COND_P()) return keyword_do_cond;
6044
- if(CMDARG_P() && !lex_state_of_p(state, EXPR_CMDARG))
6045
- return keyword_do_block;
6046
- if(lex_state_of_p(state, EXPR_BEG | EXPR_ENDARG))
6047
- return keyword_do_block;
6048
- return keyword_do;
6049
- }
6050
- if(lex_state_of_p(state, EXPR_BEG | EXPR_VALUE))
6051
- return kw->id[0];
6052
- else {
6053
- if(kw->id[0] != kw->id[1])
6054
- lex_state = EXPR_BEG;
6055
- return kw->id[1];
6056
- }
6057
- }
6058
- }
6059
-
6060
- if(lex_state_p(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) {
6061
- if(cmd_state) {
6062
- lex_state = EXPR_CMDARG;
6063
- } else {
6064
- lex_state = EXPR_ARG;
6065
- }
6066
- } else if(lex_state == EXPR_FNAME) {
6067
- lex_state = EXPR_ENDFN;
6068
- } else {
6069
- lex_state = EXPR_END;
6070
- }
6071
- }
6072
- {
6073
- ID ident = TOK_INTERN(!ENC_SINGLE(mb));
6074
-
6075
- set_yylval_name(ident);
6076
- if(!lex_state_of_p(last_state, EXPR_DOT | EXPR_FNAME) &&
6077
- is_local_id(ident) && lvar_defined(ident)) {
6078
- lex_state = EXPR_END;
6079
- }
6080
- }
6081
-
6082
- return result;
6083
- }
6454
+ return parse_ident(parser_state, c, cmd_state);
6084
6455
  }
6085
6456
 
6086
6457
  #if YYPURE
@@ -6378,7 +6749,7 @@ parser_literal_concat(rb_parser_state* parser_state, NODE *head, NODE *tail)
6378
6749
  && (headlast = head->nd_next->nd_end->nd_head)
6379
6750
  && nd_type(headlast) == NODE_STR) {
6380
6751
  lit = headlast->nd_lit;
6381
- if (!literal_concat0(lit, tail->nd_lit))
6752
+ if(!literal_concat0(lit, tail->nd_lit))
6382
6753
  goto error;
6383
6754
  tail->nd_lit = Qnil;
6384
6755
  goto append;
@@ -6442,25 +6813,25 @@ static const struct {
6442
6813
  ID token;
6443
6814
  const char *name;
6444
6815
  } op_tbl[] = {
6445
- {tDOT2, ".."},
6446
- {tDOT3, "..."},
6447
- {tPOW, "**"},
6816
+ {tDOT2, ".."},
6817
+ {tDOT3, "..."},
6818
+ {tPOW, "**"},
6448
6819
  {tDSTAR, "**"},
6449
- {tUPLUS, "+@"},
6450
- {tUMINUS, "-@"},
6451
- {tCMP, "<=>"},
6452
- {tGEQ, ">="},
6453
- {tLEQ, "<="},
6454
- {tEQ, "=="},
6455
- {tEQQ, "==="},
6456
- {tNEQ, "!="},
6457
- {tMATCH, "=~"},
6458
- {tNMATCH, "!~"},
6459
- {tAREF, "[]"},
6460
- {tASET, "[]="},
6461
- {tLSHFT, "<<"},
6462
- {tRSHFT, ">>"},
6463
- {tCOLON2, "::"},
6820
+ {tUPLUS, "+@"},
6821
+ {tUMINUS, "-@"},
6822
+ {tCMP, "<=>"},
6823
+ {tGEQ, ">="},
6824
+ {tLEQ, "<="},
6825
+ {tEQ, "=="},
6826
+ {tEQQ, "==="},
6827
+ {tNEQ, "!="},
6828
+ {tMATCH, "=~"},
6829
+ {tNMATCH, "!~"},
6830
+ {tAREF, "[]"},
6831
+ {tASET, "[]="},
6832
+ {tLSHFT, "<<"},
6833
+ {tRSHFT, ">>"},
6834
+ {tCOLON2, "::"},
6464
6835
 
6465
6836
  // Added for Rubinius
6466
6837
  {'!', "!"},
@@ -6715,7 +7086,7 @@ parser_block_dup_check(rb_parser_state* parser_state, NODE *node1, NODE *node2)
6715
7086
  static const char id_type_names[][9] = {
6716
7087
  "LOCAL",
6717
7088
  "INSTANCE",
6718
- "", /* INSTANCE2 */
7089
+ "", /* INSTANCE2 */
6719
7090
  "GLOBAL",
6720
7091
  "ATTRSET",
6721
7092
  "CONST",
@@ -6727,10 +7098,10 @@ static ID
6727
7098
  rb_id_attrset(ID id)
6728
7099
  {
6729
7100
  if(!is_notop_id(id)) {
6730
- switch (id) {
7101
+ switch(id) {
6731
7102
  case tAREF:
6732
7103
  case tASET:
6733
- return tASET; /* only exception */
7104
+ return tASET; /* only exception */
6734
7105
  }
6735
7106
  rb_name_error(id, "cannot make operator ID :%s attrset", rb_id2name(id));
6736
7107
  } else {
@@ -6756,12 +7127,17 @@ rb_id_attrset(ID id)
6756
7127
  }
6757
7128
 
6758
7129
  static NODE *
6759
- parser_attrset(rb_parser_state* parser_state, NODE *recv, ID id)
7130
+ parser_attrset(rb_parser_state* parser_state, NODE *recv, ID atype, ID id)
6760
7131
  {
6761
7132
  if(recv && nd_type(recv) == NODE_SELF) {
6762
7133
  recv = (NODE *)1;
6763
7134
  }
6764
- return NEW_ATTRASGN(recv, rb_id_attrset(id), 0);
7135
+
7136
+ if(CALL_Q_P(atype)) {
7137
+ return NEW_ANDATTRASGN(recv, rb_id_attrset(id), 0);
7138
+ } else {
7139
+ return NEW_ATTRASGN(recv, rb_id_attrset(id), 0);
7140
+ }
6765
7141
  }
6766
7142
 
6767
7143
  static void
@@ -6827,6 +7203,7 @@ parser_node_assign(rb_parser_state* parser_state, NODE *lhs, NODE *rhs)
6827
7203
  lhs->nd_value = rhs;
6828
7204
  break;
6829
7205
 
7206
+ case NODE_ANDATTRASGN:
6830
7207
  case NODE_ATTRASGN:
6831
7208
  case NODE_CALL:
6832
7209
  lhs->nd_args = arg_append(lhs->nd_args, rhs);
@@ -6869,8 +7246,8 @@ parser_new_op_assign(rb_parser_state* parser_state, NODE *lhs, ID op, NODE *rhs)
6869
7246
  }
6870
7247
 
6871
7248
  static NODE*
6872
- parser_new_attr_op_assign(rb_parser_state* parser_state,
6873
- NODE *lhs, ID attr, ID op, NODE *rhs)
7249
+ parser_new_attr_op_assign(rb_parser_state* parser_state, NODE *lhs,
7250
+ ID atype, ID attr, ID op, NODE *rhs)
6874
7251
  {
6875
7252
  NODE *asgn;
6876
7253
 
@@ -6881,7 +7258,7 @@ parser_new_attr_op_assign(rb_parser_state* parser_state,
6881
7258
  } else {
6882
7259
  op = convert_op(op);
6883
7260
  }
6884
- asgn = NEW_OP_ASGN2(lhs, attr, op, rhs);
7261
+ asgn = NEW_OP_ASGN2(lhs, CALL_Q_P(atype), attr, op, rhs);
6885
7262
  fixpos(asgn, lhs);
6886
7263
 
6887
7264
  return asgn;
@@ -7596,7 +7973,7 @@ scan_hex(const char *start, size_t len, size_t *retlen)
7596
7973
  }
7597
7974
 
7598
7975
  static ID
7599
- parser_internal_id(rb_parser_state *parser_state)
7976
+ parser_internal_id(rb_parser_state* parser_state)
7600
7977
  {
7601
7978
  ID id = (ID)vtable_size(locals_table->args) + (ID)vtable_size(locals_table->vars);
7602
7979
  id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1;