rubinius-melbourne 3.6 → 3.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/rubinius/code/melbourne/grammar.cpp +5201 -4803
- data/ext/rubinius/code/melbourne/grammar.y +1502 -1125
- data/ext/rubinius/code/melbourne/melbourne.hpp +8 -0
- data/ext/rubinius/code/melbourne/node.hpp +6 -2
- data/ext/rubinius/code/melbourne/node_types.cpp +65 -61
- data/ext/rubinius/code/melbourne/node_types.hpp +2 -0
- data/ext/rubinius/code/melbourne/node_types.rb +2 -0
- data/ext/rubinius/code/melbourne/parser_state.hpp +25 -6
- data/ext/rubinius/code/melbourne/symbols.cpp +4 -0
- data/ext/rubinius/code/melbourne/symbols.hpp +2 -0
- data/ext/rubinius/code/melbourne/visitor.cpp +27 -2
- data/lib/rubinius/code/melbourne/version.rb +1 -1
- metadata +11 -11
@@ -32,6 +32,8 @@ namespace MELBOURNE {
|
|
32
32
|
#define TRUE true
|
33
33
|
#define FALSE false
|
34
34
|
|
35
|
+
#define TAB_WIDTH 8
|
36
|
+
|
35
37
|
#define numberof(array) (int)(sizeof(array) / sizeof((array)[0]))
|
36
38
|
|
37
39
|
static void parser_prepare(rb_parser_state*);
|
@@ -54,6 +56,8 @@ static int parser_yyerror(rb_parser_state*, const char *);
|
|
54
56
|
((id)&ID_SCOPE_MASK) == ID_INSTANCE || \
|
55
57
|
((id)&ID_SCOPE_MASK) == ID_CLASS))
|
56
58
|
|
59
|
+
# define SET_LEX_STATE(ls) (lex_state = (lex_state_e)(ls))
|
60
|
+
|
57
61
|
static int yylex(void*, void *);
|
58
62
|
|
59
63
|
#define BITSTACK_PUSH(stack, n) ((stack) = ((stack)<<1)|((n)&1))
|
@@ -71,6 +75,9 @@ static int yylex(void*, void *);
|
|
71
75
|
#define CMDARG_LEXPOP() BITSTACK_LEXPOP(cmdarg_stack)
|
72
76
|
#define CMDARG_P() BITSTACK_SET_P(cmdarg_stack)
|
73
77
|
|
78
|
+
static int parser_arg_ambiguous(rb_parser_state*, char);
|
79
|
+
#define arg_ambiguous(c) parser_arg_ambiguous(parser_state, c)
|
80
|
+
|
74
81
|
static void parser_token_info_push(rb_parser_state*, const char *);
|
75
82
|
static void parser_token_info_pop(rb_parser_state*, const char *);
|
76
83
|
#define token_info_push(token) (RTEST(ruby_verbose) \
|
@@ -126,11 +133,11 @@ static bool parser_in_block(rb_parser_state*);
|
|
126
133
|
static bool parser_bv_defined(rb_parser_state*, ID);
|
127
134
|
static int parser_bv_var(rb_parser_state*, ID);
|
128
135
|
static NODE *parser_aryset(rb_parser_state*, NODE*, NODE*);
|
129
|
-
static NODE *parser_attrset(rb_parser_state*, NODE*, ID);
|
136
|
+
static NODE *parser_attrset(rb_parser_state*, NODE*, ID, ID);
|
130
137
|
static void rb_parser_backref_error(rb_parser_state*, NODE*);
|
131
138
|
static NODE *parser_node_assign(rb_parser_state*, NODE*, NODE*);
|
132
139
|
static NODE *parser_new_op_assign(rb_parser_state*, NODE*, ID, NODE*);
|
133
|
-
static NODE *parser_new_attr_op_assign(rb_parser_state*, NODE*, ID, ID, NODE*);
|
140
|
+
static NODE *parser_new_attr_op_assign(rb_parser_state*, NODE*, ID, ID, ID, NODE*);
|
134
141
|
static NODE *parser_new_const_op_assign(rb_parser_state*, NODE*, ID, NODE*);
|
135
142
|
|
136
143
|
static NODE *parser_match_op(rb_parser_state*, NODE*, NODE*);
|
@@ -144,6 +151,9 @@ static bool parser_local_id(rb_parser_state*, ID);
|
|
144
151
|
static ID* parser_local_tbl(rb_parser_state*);
|
145
152
|
static ID convert_op(ID id);
|
146
153
|
|
154
|
+
static void parser_heredoc_dedent(rb_parser_state*, NODE*);
|
155
|
+
#define heredoc_dedent(str) parser_heredoc_dedent(parser_state, (str))
|
156
|
+
|
147
157
|
rb_parser_state *parser_alloc_state() {
|
148
158
|
rb_parser_state *parser_state = (rb_parser_state*)calloc(1, sizeof(rb_parser_state));
|
149
159
|
|
@@ -165,6 +175,9 @@ rb_parser_state *parser_alloc_state() {
|
|
165
175
|
brace_nest = 0;
|
166
176
|
compile_for_eval = 0;
|
167
177
|
cur_mid = 0;
|
178
|
+
heredoc_end = 0;
|
179
|
+
heredoc_indent = 0;
|
180
|
+
heredoc_line_indent = 0;
|
168
181
|
tokenbuf = NULL;
|
169
182
|
tokidx = 0;
|
170
183
|
toksiz = 0;
|
@@ -187,7 +200,7 @@ rb_parser_state *parser_alloc_state() {
|
|
187
200
|
return parser_state;
|
188
201
|
}
|
189
202
|
|
190
|
-
void *pt_allocate(rb_parser_state
|
203
|
+
void *pt_allocate(rb_parser_state* parser_state, int size) {
|
191
204
|
void *cur;
|
192
205
|
|
193
206
|
if(!memory_cur || ((memory_cur + size) >= memory_last_addr)) {
|
@@ -212,7 +225,7 @@ void *pt_allocate(rb_parser_state *parser_state, int size) {
|
|
212
225
|
return cur;
|
213
226
|
}
|
214
227
|
|
215
|
-
void pt_free(rb_parser_state
|
228
|
+
void pt_free(rb_parser_state* parser_state) {
|
216
229
|
int i;
|
217
230
|
|
218
231
|
free(tokenbuf);
|
@@ -311,7 +324,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
311
324
|
#define list_append(l, i) parser_list_append(parser_state, l, i)
|
312
325
|
#define node_assign(a, b) parser_node_assign(parser_state, a, b)
|
313
326
|
#define new_op_assign(l, o, r) parser_new_op_assign(parser_state, l, o, r)
|
314
|
-
#define new_attr_op_assign(l,a,o,r) parser_new_attr_op_assign(parser_state, l, a, o, r)
|
327
|
+
#define new_attr_op_assign(l,t,a,o,r) parser_new_attr_op_assign(parser_state, l, t, a, o, r)
|
315
328
|
#define new_const_op_assign(l,o,r) parser_new_const_op_assign(parser_state, l, o, r)
|
316
329
|
#define call_bin_op(a, s, b) parser_call_bin_op(parser_state, a, s, b)
|
317
330
|
#define call_uni_op(n, s) parser_call_uni_op(parser_state, n, s)
|
@@ -330,7 +343,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
330
343
|
#define bv_defined(n) parser_bv_defined(parser_state, n)
|
331
344
|
#define bv_var(n) parser_bv_var(parser_state, n)
|
332
345
|
#define aryset(a, b) parser_aryset(parser_state, a, b)
|
333
|
-
#define attrset(
|
346
|
+
#define attrset(n, q, id) parser_attrset(parser_state, n, q, id)
|
334
347
|
#define match_op(a, b) parser_match_op(parser_state, a, b)
|
335
348
|
#define new_yield(n) parser_new_yield(parser_state, n)
|
336
349
|
#define dsym_node(n) parser_dsym_node(parser_state, n)
|
@@ -406,7 +419,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
406
419
|
#define STR_NEW3(p,n,e,func) parser_str_new(parser_state, (p), (n), (e), \
|
407
420
|
(func), parser_state->enc)
|
408
421
|
#define ENC_SINGLE(cr) ((cr)==ENC_CODERANGE_7BIT)
|
409
|
-
#define TOK_INTERN(
|
422
|
+
#define TOK_INTERN() parser_intern3(tok(), toklen(), parser_state->enc)
|
410
423
|
|
411
424
|
#define NEW_BLOCK_VAR(b, v) NEW_NODE(NODE_BLOCK_PASS, 0, b, v)
|
412
425
|
#define NEW_REQ_KW NEW_LIT(ID2SYM(parser_intern("*")))
|
@@ -513,8 +526,9 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
513
526
|
%type <node> mlhs mlhs_head mlhs_basic mlhs_item mlhs_node mlhs_post mlhs_inner
|
514
527
|
%type <id> fsym keyword_variable user_variable sym symbol operation operation2 operation3
|
515
528
|
%type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg
|
516
|
-
%type <id> f_kwrest f_label
|
529
|
+
%type <id> f_kwrest f_label f_arg_asgn call_op call_op2
|
517
530
|
|
531
|
+
%token END_OF_INPUT 0 "end-of-input"
|
518
532
|
%token tUPLUS /* unary+ */
|
519
533
|
%token tUMINUS /* unary- */
|
520
534
|
%token tPOW /* ** */
|
@@ -524,11 +538,17 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
524
538
|
%token tNEQ /* != */
|
525
539
|
%token tGEQ /* >= */
|
526
540
|
%token tLEQ /* <= */
|
527
|
-
%token tANDOP
|
528
|
-
%token
|
529
|
-
%token
|
530
|
-
%token
|
531
|
-
%token
|
541
|
+
%token tANDOP /* && */
|
542
|
+
%token tOROP /* || */
|
543
|
+
%token tMATCH /* =~ */
|
544
|
+
%token tNMATCH /* !~ */
|
545
|
+
%token tDOT2 /* .. */
|
546
|
+
%token tDOT3 /* ... */
|
547
|
+
%token tAREF /* [] */
|
548
|
+
%token tASET /* []= */
|
549
|
+
%token tLSHFT /* << */
|
550
|
+
%token tRSHFT /* >> */
|
551
|
+
%token tANDDOT /* &. */
|
532
552
|
%token tCOLON2 /* :: */
|
533
553
|
%token tCOLON3 /* :: at EXPR_BEG */
|
534
554
|
%token <id> tOP_ASGN /* +=, -= etc. */
|
@@ -538,9 +558,9 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
538
558
|
%token tRPAREN /* ) */
|
539
559
|
%token tLBRACK /* [ */
|
540
560
|
%token tLBRACE /* { */
|
541
|
-
%token tLBRACE_ARG /* { */
|
561
|
+
%token tLBRACE_ARG /* { arg */
|
542
562
|
%token tSTAR /* * */
|
543
|
-
%token tDSTAR /* ** */
|
563
|
+
%token tDSTAR /* **arg */
|
544
564
|
%token tAMPER /* & */
|
545
565
|
%token tLAMBDA /* -> */
|
546
566
|
%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG tSYMBOLS_BEG tQSYMBOLS_BEG
|
@@ -578,7 +598,7 @@ static int scan_hex(const char *start, size_t len, size_t *retlen);
|
|
578
598
|
|
579
599
|
%%
|
580
600
|
program : {
|
581
|
-
|
601
|
+
SET_LEX_STATE(EXPR_BEG);
|
582
602
|
local_push(0);
|
583
603
|
class_nest = 0;
|
584
604
|
}
|
@@ -702,7 +722,7 @@ stmt_or_begin : stmt
|
|
702
722
|
}
|
703
723
|
;
|
704
724
|
|
705
|
-
stmt : keyword_alias fitem {
|
725
|
+
stmt : keyword_alias fitem {SET_LEX_STATE(EXPR_FNAME | EXPR_FITEM);} fitem
|
706
726
|
{
|
707
727
|
$$ = NEW_ALIAS($2, $4);
|
708
728
|
}
|
@@ -795,15 +815,15 @@ stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem
|
|
795
815
|
$$ = NEW_OP_ASGN1($1, $5, args);
|
796
816
|
fixpos($$, $1);
|
797
817
|
}
|
798
|
-
| primary_value
|
818
|
+
| primary_value call_op tIDENTIFIER tOP_ASGN command_call
|
799
819
|
{
|
800
820
|
value_expr($5);
|
801
|
-
$$ = new_attr_op_assign($1, $3, $4, $5);
|
821
|
+
$$ = new_attr_op_assign($1, $2, $3, $4, $5);
|
802
822
|
}
|
803
|
-
| primary_value
|
823
|
+
| primary_value call_op tCONSTANT tOP_ASGN command_call
|
804
824
|
{
|
805
825
|
value_expr($5);
|
806
|
-
$$ = new_attr_op_assign($1, $3, $4, $5);
|
826
|
+
$$ = new_attr_op_assign($1, $2, $3, $4, $5);
|
807
827
|
}
|
808
828
|
| primary_value tCOLON2 tCONSTANT tOP_ASGN command_call
|
809
829
|
{
|
@@ -813,7 +833,7 @@ stmt : keyword_alias fitem {lex_state = EXPR_FNAME;} fitem
|
|
813
833
|
| primary_value tCOLON2 tIDENTIFIER tOP_ASGN command_call
|
814
834
|
{
|
815
835
|
value_expr($5);
|
816
|
-
$$ = new_attr_op_assign($1, $3, $4, $5);
|
836
|
+
$$ = new_attr_op_assign($1, parser_intern("::"), $3, $4, $5);
|
817
837
|
}
|
818
838
|
| backref tOP_ASGN command_call
|
819
839
|
{
|
@@ -878,9 +898,9 @@ command_call : command
|
|
878
898
|
;
|
879
899
|
|
880
900
|
block_command : block_call
|
881
|
-
| block_call
|
901
|
+
| block_call call_op2 operation2 command_args
|
882
902
|
{
|
883
|
-
$$ =
|
903
|
+
$$ = NEW_QCALL($2, $1, $3, $4);
|
884
904
|
}
|
885
905
|
;
|
886
906
|
|
@@ -919,15 +939,15 @@ command : fcall command_args %prec tLOWEST
|
|
919
939
|
$$ = $3;
|
920
940
|
fixpos($$, $1);
|
921
941
|
}
|
922
|
-
| primary_value
|
942
|
+
| primary_value call_op operation2 command_args %prec tLOWEST
|
923
943
|
{
|
924
|
-
$$ =
|
944
|
+
$$ = NEW_QCALL($2, $1, $3, $4);
|
925
945
|
fixpos($$, $1);
|
926
946
|
}
|
927
|
-
| primary_value
|
947
|
+
| primary_value call_op operation2 command_args cmd_brace_block
|
928
948
|
{
|
929
949
|
block_dup_check($4, $5);
|
930
|
-
$5->nd_iter =
|
950
|
+
$5->nd_iter = NEW_QCALL($2, $1, $3, $4);
|
931
951
|
$$ = $5;
|
932
952
|
fixpos($$, $1);
|
933
953
|
}
|
@@ -1062,17 +1082,17 @@ mlhs_node : user_variable
|
|
1062
1082
|
{
|
1063
1083
|
$$ = aryset($1, $3);
|
1064
1084
|
}
|
1065
|
-
| primary_value
|
1085
|
+
| primary_value call_op tIDENTIFIER
|
1066
1086
|
{
|
1067
|
-
$$ = attrset($1, $3);
|
1087
|
+
$$ = attrset($1, $2, $3);
|
1068
1088
|
}
|
1069
1089
|
| primary_value tCOLON2 tIDENTIFIER
|
1070
1090
|
{
|
1071
|
-
$$ = attrset($1, $3);
|
1091
|
+
$$ = attrset($1, parser_intern("::"), $3);
|
1072
1092
|
}
|
1073
|
-
| primary_value
|
1093
|
+
| primary_value call_op tCONSTANT
|
1074
1094
|
{
|
1075
|
-
$$ = attrset($1, $3);
|
1095
|
+
$$ = attrset($1, $2, $3);
|
1076
1096
|
}
|
1077
1097
|
| primary_value tCOLON2 tCONSTANT
|
1078
1098
|
{
|
@@ -1107,17 +1127,17 @@ lhs : user_variable
|
|
1107
1127
|
{
|
1108
1128
|
$$ = aryset($1, $3);
|
1109
1129
|
}
|
1110
|
-
| primary_value
|
1130
|
+
| primary_value call_op tIDENTIFIER
|
1111
1131
|
{
|
1112
|
-
$$ = attrset($1, $3);
|
1132
|
+
$$ = attrset($1, $2, $3);
|
1113
1133
|
}
|
1114
1134
|
| primary_value tCOLON2 tIDENTIFIER
|
1115
1135
|
{
|
1116
|
-
$$ = attrset($1, $3);
|
1136
|
+
$$ = attrset($1, parser_intern("::"), $3);
|
1117
1137
|
}
|
1118
|
-
| primary_value
|
1138
|
+
| primary_value call_op tCONSTANT
|
1119
1139
|
{
|
1120
|
-
$$ = attrset($1, $3);
|
1140
|
+
$$ = attrset($1, $2, $3);
|
1121
1141
|
}
|
1122
1142
|
| primary_value tCOLON2 tCONSTANT
|
1123
1143
|
{
|
@@ -1164,12 +1184,12 @@ fname : tIDENTIFIER
|
|
1164
1184
|
| tFID
|
1165
1185
|
| op
|
1166
1186
|
{
|
1167
|
-
|
1187
|
+
SET_LEX_STATE(EXPR_ENDFN);
|
1168
1188
|
$$ = convert_op($1);
|
1169
1189
|
}
|
1170
1190
|
| reswords
|
1171
1191
|
{
|
1172
|
-
|
1192
|
+
SET_LEX_STATE(EXPR_ENDFN);
|
1173
1193
|
$$ = $<id>1;
|
1174
1194
|
}
|
1175
1195
|
;
|
@@ -1189,7 +1209,7 @@ undef_list : fitem
|
|
1189
1209
|
{
|
1190
1210
|
$$ = NEW_UNDEF($1);
|
1191
1211
|
}
|
1192
|
-
| undef_list ',' {
|
1212
|
+
| undef_list ',' {SET_LEX_STATE(EXPR_FNAME | EXPR_FITEM);} fitem
|
1193
1213
|
{
|
1194
1214
|
$$ = block_append($1, NEW_UNDEF($4));
|
1195
1215
|
}
|
@@ -1284,20 +1304,20 @@ arg : lhs '=' arg
|
|
1284
1304
|
$$ = NEW_OP_ASGN1($1, $5, args);
|
1285
1305
|
fixpos($$, $1);
|
1286
1306
|
}
|
1287
|
-
| primary_value
|
1307
|
+
| primary_value call_op tIDENTIFIER tOP_ASGN arg
|
1288
1308
|
{
|
1289
1309
|
value_expr($5);
|
1290
|
-
$$ = new_attr_op_assign($1, $3, $4, $5);
|
1310
|
+
$$ = new_attr_op_assign($1, $2, $3, $4, $5);
|
1291
1311
|
}
|
1292
|
-
| primary_value
|
1312
|
+
| primary_value call_op tCONSTANT tOP_ASGN arg
|
1293
1313
|
{
|
1294
1314
|
value_expr($5);
|
1295
|
-
$$ = new_attr_op_assign($1, $3, $4, $5);
|
1315
|
+
$$ = new_attr_op_assign($1, $2, $3, $4, $5);
|
1296
1316
|
}
|
1297
1317
|
| primary_value tCOLON2 tIDENTIFIER tOP_ASGN arg
|
1298
1318
|
{
|
1299
1319
|
value_expr($5);
|
1300
|
-
$$ = new_attr_op_assign($1, $3, $4, $5);
|
1320
|
+
$$ = new_attr_op_assign($1, parser_intern("::"), $3, $4, $5);
|
1301
1321
|
}
|
1302
1322
|
| primary_value tCOLON2 tCONSTANT tOP_ASGN arg
|
1303
1323
|
{
|
@@ -1521,12 +1541,12 @@ call_args : command
|
|
1521
1541
|
}
|
1522
1542
|
| assocs opt_block_arg
|
1523
1543
|
{
|
1524
|
-
$$ = NEW_LIST(NEW_HASH($1));
|
1544
|
+
$$ = NEW_LIST($1 ? NEW_HASH($1) : 0);
|
1525
1545
|
$$ = arg_blk_pass($$, $2);
|
1526
1546
|
}
|
1527
1547
|
| args ',' assocs opt_block_arg
|
1528
1548
|
{
|
1529
|
-
$$ = arg_append($1, NEW_HASH($3));
|
1549
|
+
$$ = $3 ? arg_append($1, NEW_HASH($3)) : $1;
|
1530
1550
|
$$ = arg_blk_pass($$, $4);
|
1531
1551
|
}
|
1532
1552
|
| block_arg
|
@@ -1650,13 +1670,19 @@ primary : literal
|
|
1650
1670
|
}
|
1651
1671
|
nd_set_line($$, $<num>2);
|
1652
1672
|
}
|
1653
|
-
| tLPAREN_ARG {
|
1673
|
+
| tLPAREN_ARG {SET_LEX_STATE(EXPR_ENDARG);} rparen
|
1654
1674
|
{
|
1655
1675
|
$$ = 0;
|
1656
1676
|
}
|
1657
|
-
| tLPAREN_ARG
|
1677
|
+
| tLPAREN_ARG
|
1658
1678
|
{
|
1659
|
-
|
1679
|
+
$<val>1 = cmdarg_stack;
|
1680
|
+
cmdarg_stack = 0;
|
1681
|
+
}
|
1682
|
+
expr {SET_LEX_STATE(EXPR_ENDARG);} rparen
|
1683
|
+
{
|
1684
|
+
cmdarg_stack = $<val>1;
|
1685
|
+
$$ = $3;
|
1660
1686
|
}
|
1661
1687
|
| tLPAREN compstmt ')'
|
1662
1688
|
{
|
@@ -1861,11 +1887,14 @@ primary : literal
|
|
1861
1887
|
in_def--;
|
1862
1888
|
cur_mid = $<id>3;
|
1863
1889
|
}
|
1864
|
-
| k_def singleton dot_or_colon {
|
1890
|
+
| k_def singleton dot_or_colon {SET_LEX_STATE(EXPR_FNAME);} fname
|
1865
1891
|
{
|
1866
|
-
in_single
|
1867
|
-
|
1892
|
+
$<num>4 = in_single;
|
1893
|
+
in_single = 1;
|
1894
|
+
SET_LEX_STATE(EXPR_ENDFN | EXPR_LABEL); /* force for args */
|
1868
1895
|
local_push(0);
|
1896
|
+
$<id>$ = current_arg;
|
1897
|
+
current_arg = 0;
|
1869
1898
|
}
|
1870
1899
|
f_arglist
|
1871
1900
|
bodystmt
|
@@ -1875,7 +1904,8 @@ primary : literal
|
|
1875
1904
|
$$ = NEW_DEFS($2, $5, $7, body);
|
1876
1905
|
nd_set_line($$, $<num>1);
|
1877
1906
|
local_pop();
|
1878
|
-
in_single
|
1907
|
+
in_single = $<num>4 & 1;
|
1908
|
+
current_arg = $<id>6;
|
1879
1909
|
}
|
1880
1910
|
| keyword_break
|
1881
1911
|
{
|
@@ -2179,7 +2209,7 @@ opt_bv_decl : opt_nl
|
|
2179
2209
|
{
|
2180
2210
|
$$ = 0;
|
2181
2211
|
}
|
2182
|
-
| opt_nl ';' bv_decls
|
2212
|
+
| opt_nl ';' bv_decls opt_nl
|
2183
2213
|
{
|
2184
2214
|
// This is deliberately different than MRI.
|
2185
2215
|
$$ = $3;
|
@@ -2276,21 +2306,21 @@ block_call : command do_block
|
|
2276
2306
|
$$ = $2;
|
2277
2307
|
fixpos($$, $1);
|
2278
2308
|
}
|
2279
|
-
| block_call
|
2309
|
+
| block_call call_op2 operation2 opt_paren_args
|
2280
2310
|
{
|
2281
|
-
$$ =
|
2311
|
+
$$ = NEW_QCALL($2, $1, $3, $4);
|
2282
2312
|
}
|
2283
|
-
| block_call
|
2313
|
+
| block_call call_op2 operation2 opt_paren_args brace_block
|
2284
2314
|
{
|
2285
2315
|
block_dup_check($4, $5);
|
2286
|
-
$5->nd_iter =
|
2316
|
+
$5->nd_iter = NEW_QCALL($2, $1, $3, $4);
|
2287
2317
|
$$ = $5;
|
2288
2318
|
fixpos($$, $1);
|
2289
2319
|
}
|
2290
|
-
| block_call
|
2320
|
+
| block_call call_op2 operation2 command_args do_block
|
2291
2321
|
{
|
2292
2322
|
block_dup_check($4, $5);
|
2293
|
-
$5->nd_iter =
|
2323
|
+
$5->nd_iter = NEW_QCALL($2, $1, $3, $4);
|
2294
2324
|
$$ = $5;
|
2295
2325
|
fixpos($$, $1);
|
2296
2326
|
}
|
@@ -2302,13 +2332,13 @@ method_call : fcall paren_args
|
|
2302
2332
|
$$->nd_args = $2;
|
2303
2333
|
fixpos($$, $2);
|
2304
2334
|
}
|
2305
|
-
| primary_value
|
2335
|
+
| primary_value call_op operation2
|
2306
2336
|
{
|
2307
2337
|
$<num>$ = sourceline;
|
2308
2338
|
}
|
2309
2339
|
opt_paren_args
|
2310
2340
|
{
|
2311
|
-
$$ =
|
2341
|
+
$$ = NEW_QCALL($2, $1, $3, $5);
|
2312
2342
|
nd_set_line($$, $<num>4);
|
2313
2343
|
}
|
2314
2344
|
| primary_value tCOLON2 operation2
|
@@ -2324,13 +2354,13 @@ method_call : fcall paren_args
|
|
2324
2354
|
{
|
2325
2355
|
$$ = NEW_CALL($1, $3, 0);
|
2326
2356
|
}
|
2327
|
-
| primary_value
|
2357
|
+
| primary_value call_op
|
2328
2358
|
{
|
2329
2359
|
$<num>$ = sourceline;
|
2330
2360
|
}
|
2331
2361
|
paren_args
|
2332
2362
|
{
|
2333
|
-
$$ =
|
2363
|
+
$$ = NEW_QCALL($2, $1, parser_intern("call"), $4);
|
2334
2364
|
nd_set_line($$, $<num>3);
|
2335
2365
|
}
|
2336
2366
|
| primary_value tCOLON2
|
@@ -2469,6 +2499,8 @@ string : tCHAR
|
|
2469
2499
|
|
2470
2500
|
string1 : tSTRING_BEG string_contents tSTRING_END
|
2471
2501
|
{
|
2502
|
+
heredoc_dedent($2);
|
2503
|
+
heredoc_indent = 0;
|
2472
2504
|
$$ = $2;
|
2473
2505
|
}
|
2474
2506
|
;
|
@@ -2476,6 +2508,10 @@ string1 : tSTRING_BEG string_contents tSTRING_END
|
|
2476
2508
|
xstring : tXSTRING_BEG xstring_contents tSTRING_END
|
2477
2509
|
{
|
2478
2510
|
NODE *node = $2;
|
2511
|
+
|
2512
|
+
heredoc_dedent($2);
|
2513
|
+
heredoc_indent = 0;
|
2514
|
+
|
2479
2515
|
if(!node) {
|
2480
2516
|
node = NEW_XSTR(STR_NEW0());
|
2481
2517
|
} else {
|
@@ -2700,7 +2736,7 @@ string_content : tSTRING_CONTENT
|
|
2700
2736
|
{
|
2701
2737
|
$<node>$ = lex_strterm;
|
2702
2738
|
lex_strterm = 0;
|
2703
|
-
|
2739
|
+
SET_LEX_STATE(EXPR_BEG);
|
2704
2740
|
}
|
2705
2741
|
string_dvar
|
2706
2742
|
{
|
@@ -2717,21 +2753,31 @@ string_content : tSTRING_CONTENT
|
|
2717
2753
|
{
|
2718
2754
|
$<node>$ = lex_strterm;
|
2719
2755
|
lex_strterm = 0;
|
2720
|
-
|
2756
|
+
}
|
2757
|
+
{
|
2758
|
+
$<num>$ = lex_state;
|
2759
|
+
SET_LEX_STATE(EXPR_BEG);
|
2721
2760
|
}
|
2722
2761
|
{
|
2723
2762
|
$<num>$ = brace_nest;
|
2724
2763
|
brace_nest = 0;
|
2725
2764
|
}
|
2765
|
+
{
|
2766
|
+
$<num>$ = heredoc_indent;
|
2767
|
+
heredoc_indent = 0;
|
2768
|
+
}
|
2726
2769
|
compstmt tSTRING_DEND
|
2727
2770
|
{
|
2728
2771
|
cond_stack = $<val>1;
|
2729
2772
|
cmdarg_stack = $<val>2;
|
2730
2773
|
lex_strterm = $<node>3;
|
2731
|
-
|
2774
|
+
SET_LEX_STATE($<num>4);
|
2775
|
+
brace_nest = $<num>5;
|
2776
|
+
heredoc_indent = $<num>6;
|
2777
|
+
heredoc_line_indent = -1;
|
2732
2778
|
|
2733
|
-
if($
|
2734
|
-
$$ = new_evstr($
|
2779
|
+
if($7) $7->flags &= ~NODE_FL_NEWLINE;
|
2780
|
+
$$ = new_evstr($7);
|
2735
2781
|
}
|
2736
2782
|
;
|
2737
2783
|
|
@@ -2743,7 +2789,7 @@ string_dvar : tGVAR {$$ = NEW_GVAR($1);}
|
|
2743
2789
|
|
2744
2790
|
symbol : tSYMBEG sym
|
2745
2791
|
{
|
2746
|
-
|
2792
|
+
SET_LEX_STATE(EXPR_END);
|
2747
2793
|
$$ = $2;
|
2748
2794
|
}
|
2749
2795
|
;
|
@@ -2756,7 +2802,7 @@ sym : fname
|
|
2756
2802
|
|
2757
2803
|
dsym : tSYMBEG xstring_contents tSTRING_END
|
2758
2804
|
{
|
2759
|
-
|
2805
|
+
SET_LEX_STATE(EXPR_END);
|
2760
2806
|
$$ = dsym_node($2);
|
2761
2807
|
}
|
2762
2808
|
;
|
@@ -2818,20 +2864,16 @@ backref : tNTH_REF
|
|
2818
2864
|
| tBACK_REF
|
2819
2865
|
;
|
2820
2866
|
|
2821
|
-
superclass :
|
2867
|
+
superclass : '<'
|
2822
2868
|
{
|
2823
|
-
|
2824
|
-
}
|
2825
|
-
| '<'
|
2826
|
-
{
|
2827
|
-
lex_state = EXPR_BEG;
|
2869
|
+
SET_LEX_STATE(EXPR_BEG);
|
2828
2870
|
command_start = TRUE;
|
2829
2871
|
}
|
2830
2872
|
expr_value term
|
2831
2873
|
{
|
2832
2874
|
$$ = $3;
|
2833
2875
|
}
|
2834
|
-
|
|
2876
|
+
| /* none */
|
2835
2877
|
{
|
2836
2878
|
yyerrok;
|
2837
2879
|
$$ = 0;
|
@@ -2841,13 +2883,19 @@ superclass : term
|
|
2841
2883
|
f_arglist : '(' f_args rparen
|
2842
2884
|
{
|
2843
2885
|
$$ = $2;
|
2844
|
-
|
2886
|
+
SET_LEX_STATE(EXPR_BEG);
|
2845
2887
|
command_start = TRUE;
|
2846
2888
|
}
|
2847
|
-
|
|
2889
|
+
| {
|
2890
|
+
$<num>$ = in_kwarg;
|
2891
|
+
in_kwarg = 1;
|
2892
|
+
SET_LEX_STATE(lex_state | EXPR_LABEL); /* force for args */
|
2893
|
+
}
|
2894
|
+
f_args term
|
2848
2895
|
{
|
2849
|
-
|
2850
|
-
|
2896
|
+
in_kwarg = !!$<num>1;
|
2897
|
+
$$ = $2;
|
2898
|
+
SET_LEX_STATE(EXPR_BEG);
|
2851
2899
|
command_start = TRUE;
|
2852
2900
|
}
|
2853
2901
|
;
|
@@ -2973,9 +3021,18 @@ f_norm_arg : f_bad_arg
|
|
2973
3021
|
}
|
2974
3022
|
;
|
2975
3023
|
|
2976
|
-
|
3024
|
+
f_arg_asgn : f_norm_arg
|
3025
|
+
{
|
3026
|
+
ID id = get_id($1);
|
3027
|
+
arg_var(id);
|
3028
|
+
current_arg = id;
|
3029
|
+
$$ = $1;
|
3030
|
+
}
|
3031
|
+
;
|
3032
|
+
|
3033
|
+
f_arg_item : f_arg_asgn
|
2977
3034
|
{
|
2978
|
-
|
3035
|
+
current_arg = 0;
|
2979
3036
|
$$ = NEW_ARGS_AUX($1, 1);
|
2980
3037
|
}
|
2981
3038
|
| tLPAREN f_margs rparen
|
@@ -2999,18 +3056,22 @@ f_arg : f_arg_item
|
|
2999
3056
|
|
3000
3057
|
f_label : tLABEL
|
3001
3058
|
{
|
3002
|
-
|
3059
|
+
ID id = get_id($1);
|
3060
|
+
arg_var(formal_argument(id));
|
3061
|
+
current_arg = id;
|
3003
3062
|
$$ = $1;
|
3004
3063
|
}
|
3005
3064
|
;
|
3006
3065
|
|
3007
3066
|
f_kw : f_label arg_value
|
3008
3067
|
{
|
3068
|
+
current_arg = 0;
|
3009
3069
|
$$ = assignable($1, $2);
|
3010
3070
|
$$ = NEW_KW_ARG(0, $$);
|
3011
3071
|
}
|
3012
3072
|
| f_label
|
3013
3073
|
{
|
3074
|
+
current_arg = 0;
|
3014
3075
|
$$ = assignable($1, NEW_REQ_KW);
|
3015
3076
|
$$ = NEW_KW_ARG(0, $$);
|
3016
3077
|
}
|
@@ -3035,7 +3096,7 @@ f_block_kwarg : f_block_kw
|
|
3035
3096
|
| f_block_kwarg ',' f_block_kw
|
3036
3097
|
{
|
3037
3098
|
NODE *kws = $1;
|
3038
|
-
while
|
3099
|
+
while(kws->nd_next) {
|
3039
3100
|
kws = kws->nd_next;
|
3040
3101
|
}
|
3041
3102
|
kws->nd_next = $3;
|
@@ -3050,7 +3111,7 @@ f_kwarg : f_kw
|
|
3050
3111
|
| f_kwarg ',' f_kw
|
3051
3112
|
{
|
3052
3113
|
NODE *kws = $1;
|
3053
|
-
while
|
3114
|
+
while(kws->nd_next) {
|
3054
3115
|
kws = kws->nd_next;
|
3055
3116
|
}
|
3056
3117
|
kws->nd_next = $3;
|
@@ -3070,20 +3131,21 @@ f_kwrest : kwrest_mark tIDENTIFIER
|
|
3070
3131
|
| kwrest_mark
|
3071
3132
|
{
|
3072
3133
|
$$ = internal_id();
|
3134
|
+
arg_var($$);
|
3073
3135
|
}
|
3074
3136
|
;
|
3075
3137
|
|
3076
|
-
f_opt :
|
3138
|
+
f_opt : f_arg_asgn '=' arg_value
|
3077
3139
|
{
|
3078
|
-
|
3140
|
+
current_arg = 0;
|
3079
3141
|
$$ = assignable($1, $3);
|
3080
3142
|
$$ = NEW_OPT_ARG(0, $$);
|
3081
3143
|
}
|
3082
3144
|
;
|
3083
3145
|
|
3084
|
-
f_block_opt :
|
3146
|
+
f_block_opt : f_arg_asgn '=' primary_value
|
3085
3147
|
{
|
3086
|
-
|
3148
|
+
current_arg = 0;
|
3087
3149
|
$$ = assignable($1, $3);
|
3088
3150
|
$$ = NEW_OPT_ARG(0, $$);
|
3089
3151
|
}
|
@@ -3169,7 +3231,7 @@ singleton : var_ref
|
|
3169
3231
|
$$ = $1;
|
3170
3232
|
if(!$$) $$ = NEW_NIL();
|
3171
3233
|
}
|
3172
|
-
| '(' {
|
3234
|
+
| '(' {SET_LEX_STATE(EXPR_BEG);} expr rparen
|
3173
3235
|
{
|
3174
3236
|
if($3 == 0) {
|
3175
3237
|
yy_error("can't define singleton method for ().");
|
@@ -3245,6 +3307,23 @@ dot_or_colon : '.'
|
|
3245
3307
|
| tCOLON2
|
3246
3308
|
;
|
3247
3309
|
|
3310
|
+
call_op : '.'
|
3311
|
+
{
|
3312
|
+
$$ = '.';
|
3313
|
+
}
|
3314
|
+
| tANDDOT
|
3315
|
+
{
|
3316
|
+
$$ = tANDDOT;
|
3317
|
+
}
|
3318
|
+
;
|
3319
|
+
|
3320
|
+
call_op2 : call_op
|
3321
|
+
| tCOLON2
|
3322
|
+
{
|
3323
|
+
$$ = tCOLON2;
|
3324
|
+
}
|
3325
|
+
;
|
3326
|
+
|
3248
3327
|
opt_terms : /* none */
|
3249
3328
|
| terms
|
3250
3329
|
;
|
@@ -3341,6 +3420,28 @@ static int parser_here_document(rb_parser_state*, NODE*);
|
|
3341
3420
|
|
3342
3421
|
#define parser_isascii() ISASCII(*(lex_p-1))
|
3343
3422
|
|
3423
|
+
static int token_info_get_column(rb_parser_state* parser_state, const char *pend) {
|
3424
|
+
int col = 1;
|
3425
|
+
const char *p;
|
3426
|
+
for(p = lex_pbeg; p < pend; p++) {
|
3427
|
+
if(*p == '\t') {
|
3428
|
+
col = (((col - 1) / TAB_WIDTH) + 1) * TAB_WIDTH;
|
3429
|
+
}
|
3430
|
+
col++;
|
3431
|
+
}
|
3432
|
+
return col;
|
3433
|
+
}
|
3434
|
+
|
3435
|
+
static int token_info_has_nonspaces(rb_parser_state* parser_state, const char *pend) {
|
3436
|
+
const char *p;
|
3437
|
+
for(p = lex_pbeg; p < pend; p++) {
|
3438
|
+
if(*p != ' ' && *p != '\t') {
|
3439
|
+
return 1;
|
3440
|
+
}
|
3441
|
+
}
|
3442
|
+
return 0;
|
3443
|
+
}
|
3444
|
+
|
3344
3445
|
static void parser_token_info_push(rb_parser_state* parser_state, const char *token) {
|
3345
3446
|
/* TODO */
|
3346
3447
|
}
|
@@ -3395,21 +3496,21 @@ must_be_ascii_compatible(VALUE s)
|
|
3395
3496
|
static VALUE
|
3396
3497
|
lex_get_str(rb_parser_state* parser_state, VALUE s)
|
3397
3498
|
{
|
3398
|
-
|
3399
|
-
|
3499
|
+
char *beg, *end, *start;
|
3500
|
+
long len;
|
3400
3501
|
|
3401
3502
|
beg = RSTRING_PTR(s);
|
3503
|
+
len = RSTRING_LEN(s);
|
3504
|
+
start = beg;
|
3402
3505
|
if(lex_gets_ptr) {
|
3403
|
-
if(
|
3506
|
+
if(len == lex_gets_ptr) return Qnil;
|
3404
3507
|
beg += lex_gets_ptr;
|
3508
|
+
len -= lex_gets_ptr;
|
3405
3509
|
}
|
3406
|
-
|
3407
|
-
end = beg;
|
3408
|
-
|
3409
|
-
|
3410
|
-
}
|
3411
|
-
lex_gets_ptr = end - RSTRING_PTR(s);
|
3412
|
-
return REF(parser_enc_str_new(beg, end - beg, enc));
|
3510
|
+
end = (char*)memchr(beg, '\n', len);
|
3511
|
+
if(end) len = ++end - beg;
|
3512
|
+
lex_gets_ptr += len;
|
3513
|
+
return REF(rb_str_subseq(s, beg - start, len));
|
3413
3514
|
}
|
3414
3515
|
|
3415
3516
|
static VALUE
|
@@ -3573,10 +3674,10 @@ parser_str_new(rb_parser_state* parser_state, const char *p, long n,
|
|
3573
3674
|
#define lex_eol_p() (lex_p >= lex_pend)
|
3574
3675
|
#define peek(c) (lex_p < lex_pend && (c) == *lex_p)
|
3575
3676
|
#define peek_n(c,n) (lex_p+(n) < lex_pend && (c) == (unsigned char)lex_p[n])
|
3677
|
+
#define peekc() peekc_n(0)
|
3678
|
+
#define peekc_n(n) (lex_p+(n) < lex_pend ? (unsigned char)lex_p[n] : -1)
|
3576
3679
|
|
3577
|
-
static inline int
|
3578
|
-
parser_nextc(rb_parser_state* parser_state)
|
3579
|
-
{
|
3680
|
+
static inline int parser_nextc(rb_parser_state* parser_state) {
|
3580
3681
|
int c;
|
3581
3682
|
|
3582
3683
|
if(lex_p == lex_pend) {
|
@@ -3614,11 +3715,12 @@ parser_nextc(rb_parser_state* parser_state)
|
|
3614
3715
|
return c;
|
3615
3716
|
}
|
3616
3717
|
|
3617
|
-
static void
|
3618
|
-
parser_pushback(rb_parser_state* parser_state, int c)
|
3619
|
-
{
|
3718
|
+
static void parser_pushback(rb_parser_state* parser_state, int c) {
|
3620
3719
|
if(c == -1) return;
|
3621
3720
|
lex_p--;
|
3721
|
+
if(lex_p > lex_pbeg && lex_p[0] == '\n' && lex_p[-1] == '\r') {
|
3722
|
+
lex_p--;
|
3723
|
+
}
|
3622
3724
|
}
|
3623
3725
|
|
3624
3726
|
/* Indicates if we're currently at the beginning of a line. */
|
@@ -3632,9 +3734,7 @@ parser_pushback(rb_parser_state* parser_state, int c)
|
|
3632
3734
|
#define toklen() tokidx
|
3633
3735
|
#define toklast() (tokidx>0?tokenbuf[tokidx-1]:0)
|
3634
3736
|
|
3635
|
-
static char*
|
3636
|
-
parser_newtok(rb_parser_state* parser_state)
|
3637
|
-
{
|
3737
|
+
static char* parser_newtok(rb_parser_state* parser_state) {
|
3638
3738
|
tokidx = 0;
|
3639
3739
|
tokline = sourceline;
|
3640
3740
|
if(!tokenbuf) {
|
@@ -3648,9 +3748,7 @@ parser_newtok(rb_parser_state* parser_state)
|
|
3648
3748
|
return tokenbuf;
|
3649
3749
|
}
|
3650
3750
|
|
3651
|
-
static char *
|
3652
|
-
parser_tokspace(rb_parser_state *parser_state, int n)
|
3653
|
-
{
|
3751
|
+
static char * parser_tokspace(rb_parser_state* parser_state, int n) {
|
3654
3752
|
tokidx += n;
|
3655
3753
|
|
3656
3754
|
if(tokidx >= toksiz) {
|
@@ -3663,8 +3761,7 @@ parser_tokspace(rb_parser_state *parser_state, int n)
|
|
3663
3761
|
}
|
3664
3762
|
|
3665
3763
|
|
3666
|
-
static void parser_tokadd(rb_parser_state* parser_state, char c)
|
3667
|
-
{
|
3764
|
+
static void parser_tokadd(rb_parser_state* parser_state, char c) {
|
3668
3765
|
assert(tokidx < toksiz && tokidx >= 0);
|
3669
3766
|
tokenbuf[tokidx++] = c;
|
3670
3767
|
if(tokidx >= toksiz) {
|
@@ -3673,9 +3770,7 @@ static void parser_tokadd(rb_parser_state* parser_state, char c)
|
|
3673
3770
|
}
|
3674
3771
|
}
|
3675
3772
|
|
3676
|
-
static int
|
3677
|
-
parser_tok_hex(rb_parser_state *parser_state, size_t *numlen)
|
3678
|
-
{
|
3773
|
+
static int parser_tok_hex(rb_parser_state* parser_state, size_t *numlen) {
|
3679
3774
|
int c;
|
3680
3775
|
|
3681
3776
|
c = scan_hex(lex_p, 2, numlen);
|
@@ -3690,7 +3785,7 @@ parser_tok_hex(rb_parser_state *parser_state, size_t *numlen)
|
|
3690
3785
|
#define tokcopy(n) memcpy(tokspace(n), lex_p - (n), (n))
|
3691
3786
|
|
3692
3787
|
static int
|
3693
|
-
parser_tokadd_utf8(rb_parser_state
|
3788
|
+
parser_tokadd_utf8(rb_parser_state* parser_state, rb_encoding** encp,
|
3694
3789
|
int string_literal, int symbol_literal, int regexp_literal)
|
3695
3790
|
{
|
3696
3791
|
/*
|
@@ -3740,7 +3835,8 @@ parser_tokadd_utf8(rb_parser_state *parser_state, rb_encoding **encp,
|
|
3740
3835
|
|
3741
3836
|
if(regexp_literal) tokadd('}');
|
3742
3837
|
nextc();
|
3743
|
-
} else {
|
3838
|
+
} else {
|
3839
|
+
/* handle \uxxxx form */
|
3744
3840
|
codepoint = scan_hex(lex_p, 4, &numlen);
|
3745
3841
|
if(numlen < 4) {
|
3746
3842
|
yy_error("invalid Unicode escape");
|
@@ -3763,55 +3859,46 @@ parser_tokadd_utf8(rb_parser_state *parser_state, rb_encoding **encp,
|
|
3763
3859
|
#define ESCAPE_CONTROL 1
|
3764
3860
|
#define ESCAPE_META 2
|
3765
3861
|
|
3766
|
-
static int
|
3767
|
-
|
3862
|
+
static int parser_read_escape(rb_parser_state* parser_state,
|
3863
|
+
int flags, rb_encoding **encp)
|
3768
3864
|
{
|
3769
3865
|
int c;
|
3770
3866
|
size_t numlen;
|
3771
3867
|
|
3772
3868
|
switch(c = nextc()) {
|
3773
|
-
case '\\':
|
3869
|
+
case '\\': /* Backslash */
|
3774
3870
|
return c;
|
3775
|
-
|
3776
|
-
case 'n': /* newline */
|
3871
|
+
case 'n': /* newline */
|
3777
3872
|
return '\n';
|
3778
|
-
|
3779
|
-
case 't': /* horizontal tab */
|
3873
|
+
case 't': /* horizontal tab */
|
3780
3874
|
return '\t';
|
3781
|
-
|
3782
|
-
case 'r': /* carriage-return */
|
3875
|
+
case 'r': /* carriage-return */
|
3783
3876
|
return '\r';
|
3784
|
-
|
3785
|
-
case 'f': /* form-feed */
|
3877
|
+
case 'f': /* form-feed */
|
3786
3878
|
return '\f';
|
3787
|
-
|
3788
|
-
case 'v': /* vertical tab */
|
3879
|
+
case 'v': /* vertical tab */
|
3789
3880
|
return '\13';
|
3790
|
-
|
3791
|
-
case 'a': /* alarm(bell) */
|
3881
|
+
case 'a': /* alarm(bell) */
|
3792
3882
|
return '\007';
|
3793
|
-
|
3794
|
-
case 'e': /* escape */
|
3883
|
+
case 'e': /* escape */
|
3795
3884
|
return 033;
|
3796
3885
|
|
3797
3886
|
case '0': case '1': case '2': case '3': /* octal constant */
|
3798
3887
|
case '4': case '5': case '6': case '7':
|
3799
|
-
if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
|
3800
3888
|
pushback(c);
|
3801
3889
|
c = scan_oct(lex_p, 3, &numlen);
|
3802
3890
|
lex_p += numlen;
|
3803
3891
|
return c;
|
3804
3892
|
|
3805
|
-
case 'x':
|
3806
|
-
if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
|
3893
|
+
case 'x': /* hex constant */
|
3807
3894
|
c = tok_hex(&numlen);
|
3808
3895
|
if(numlen == 0) return 0;
|
3809
3896
|
return c;
|
3810
3897
|
|
3811
|
-
case 'b':
|
3898
|
+
case 'b': /* backspace */
|
3812
3899
|
return '\010';
|
3813
3900
|
|
3814
|
-
case 's':
|
3901
|
+
case 's': /* space */
|
3815
3902
|
return ' ';
|
3816
3903
|
|
3817
3904
|
case 'M':
|
@@ -3856,16 +3943,12 @@ parser_read_escape(rb_parser_state *parser_state, int flags, rb_encoding **encp)
|
|
3856
3943
|
}
|
3857
3944
|
}
|
3858
3945
|
|
3859
|
-
static void
|
3860
|
-
parser_tokaddmbc(rb_parser_state* parser_state, int c, rb_encoding *enc)
|
3861
|
-
{
|
3946
|
+
static void parser_tokaddmbc(rb_parser_state* parser_state, int c, rb_encoding *enc) {
|
3862
3947
|
int len = parser_enc_codelen(c, enc);
|
3863
3948
|
parser_enc_mbcput(c, tokspace(len), enc);
|
3864
3949
|
}
|
3865
3950
|
|
3866
|
-
static int
|
3867
|
-
parser_tokadd_escape(rb_parser_state* parser_state, rb_encoding **encp)
|
3868
|
-
{
|
3951
|
+
static int parser_tokadd_escape(rb_parser_state* parser_state, rb_encoding **encp) {
|
3869
3952
|
int c;
|
3870
3953
|
int flags = 0;
|
3871
3954
|
size_t numlen;
|
@@ -3873,11 +3956,10 @@ parser_tokadd_escape(rb_parser_state* parser_state, rb_encoding **encp)
|
|
3873
3956
|
first:
|
3874
3957
|
switch(c = nextc()) {
|
3875
3958
|
case '\n':
|
3876
|
-
return 0;
|
3959
|
+
return 0; /* just ignore */
|
3877
3960
|
|
3878
3961
|
case '0': case '1': case '2': case '3': /* octal constant */
|
3879
3962
|
case '4': case '5': case '6': case '7':
|
3880
|
-
if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
|
3881
3963
|
{
|
3882
3964
|
scan_oct(--lex_p, 3, &numlen);
|
3883
3965
|
if(numlen == 0) goto eof;
|
@@ -3886,8 +3968,7 @@ first:
|
|
3886
3968
|
}
|
3887
3969
|
return 0;
|
3888
3970
|
|
3889
|
-
case 'x':
|
3890
|
-
if(flags & (ESCAPE_CONTROL|ESCAPE_META)) goto eof;
|
3971
|
+
case 'x': /* hex constant */
|
3891
3972
|
{
|
3892
3973
|
tok_hex(&numlen);
|
3893
3974
|
if(numlen == 0) goto eof;
|
@@ -3938,9 +4019,7 @@ eof:
|
|
3938
4019
|
return 0;
|
3939
4020
|
}
|
3940
4021
|
|
3941
|
-
static int
|
3942
|
-
parser_regx_options(rb_parser_state* parser_state)
|
3943
|
-
{
|
4022
|
+
static int parser_regx_options(rb_parser_state* parser_state) {
|
3944
4023
|
int kcode = 0;
|
3945
4024
|
int options = 0;
|
3946
4025
|
int c;
|
@@ -3993,9 +4072,7 @@ parser_regx_options(rb_parser_state* parser_state)
|
|
3993
4072
|
return options | kcode;
|
3994
4073
|
}
|
3995
4074
|
|
3996
|
-
static int
|
3997
|
-
parser_tokadd_mbchar(rb_parser_state *parser_state, int c)
|
3998
|
-
{
|
4075
|
+
static int parser_tokadd_mbchar(rb_parser_state* parser_state, int c) {
|
3999
4076
|
int len = parser_precise_mbclen();
|
4000
4077
|
if(!MBCLEN_CHARFOUND_P(len)) {
|
4001
4078
|
rb_compile_error(parser_state, "invalid multibyte char (%s)", parser_encoding_name());
|
@@ -4009,9 +4086,42 @@ parser_tokadd_mbchar(rb_parser_state *parser_state, int c)
|
|
4009
4086
|
|
4010
4087
|
#define tokadd_mbchar(c) parser_tokadd_mbchar(parser_state, c)
|
4011
4088
|
|
4089
|
+
static inline int simple_re_meta(int c) {
|
4090
|
+
switch(c) {
|
4091
|
+
case '$': case '*': case '+': case '.':
|
4092
|
+
case '?': case '^': case '|':
|
4093
|
+
case ')': case ']': case '}': case '>':
|
4094
|
+
return TRUE;
|
4095
|
+
default:
|
4096
|
+
return FALSE;
|
4097
|
+
}
|
4098
|
+
}
|
4099
|
+
|
4100
|
+
static int parser_update_heredoc_indent(rb_parser_state* parser_state, int c) {
|
4101
|
+
if(heredoc_line_indent == -1) {
|
4102
|
+
if(c == '\n') heredoc_line_indent = 0;
|
4103
|
+
} else {
|
4104
|
+
if(c == ' ') {
|
4105
|
+
heredoc_line_indent++;
|
4106
|
+
return TRUE;
|
4107
|
+
} else if(c == '\t') {
|
4108
|
+
int w = (heredoc_line_indent / TAB_WIDTH) + 1;
|
4109
|
+
heredoc_line_indent = w * TAB_WIDTH;
|
4110
|
+
return TRUE;
|
4111
|
+
} else if(c != '\n') {
|
4112
|
+
if(heredoc_indent > heredoc_line_indent) {
|
4113
|
+
heredoc_indent = heredoc_line_indent;
|
4114
|
+
}
|
4115
|
+
heredoc_line_indent = -1;
|
4116
|
+
}
|
4117
|
+
}
|
4118
|
+
return FALSE;
|
4119
|
+
}
|
4120
|
+
|
4012
4121
|
static int
|
4013
|
-
parser_tokadd_string(rb_parser_state
|
4014
|
-
int func, int term, int paren, long *nest,
|
4122
|
+
parser_tokadd_string(rb_parser_state* parser_state,
|
4123
|
+
int func, int term, int paren, long *nest,
|
4124
|
+
rb_encoding **encp)
|
4015
4125
|
{
|
4016
4126
|
int c;
|
4017
4127
|
int has_nonascii = 0;
|
@@ -4019,111 +4129,123 @@ parser_tokadd_string(rb_parser_state *parser_state,
|
|
4019
4129
|
char *errbuf = 0;
|
4020
4130
|
static const char mixed_msg[] = "%s mixed within %s source";
|
4021
4131
|
|
4022
|
-
#define mixed_error(enc1, enc2) if(!errbuf) {
|
4023
|
-
|
4024
|
-
|
4025
|
-
|
4026
|
-
|
4027
|
-
|
4028
|
-
|
4029
|
-
|
4030
|
-
|
4031
|
-
|
4032
|
-
|
4033
|
-
|
4034
|
-
|
4035
|
-
|
4036
|
-
|
4037
|
-
|
4038
|
-
|
4039
|
-
|
4040
|
-
|
4041
|
-
|
4042
|
-
if(!nest || !*nest) {
|
4043
|
-
pushback(c);
|
4044
|
-
break;
|
4045
|
-
}
|
4046
|
-
--*nest;
|
4047
|
-
} else if((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) {
|
4048
|
-
int c2 = *lex_p;
|
4049
|
-
if(c2 == '$' || c2 == '@' || c2 == '{') {
|
4050
|
-
pushback(c);
|
4051
|
-
break;
|
4132
|
+
#define mixed_error(enc1, enc2) if(!errbuf) { \
|
4133
|
+
size_t len = sizeof(mixed_msg) - 4; \
|
4134
|
+
len += strlen(rb_enc_name(enc1)); \
|
4135
|
+
len += strlen(rb_enc_name(enc2)); \
|
4136
|
+
errbuf = ALLOCA_N(char, len); \
|
4137
|
+
snprintf(errbuf, len, mixed_msg, \
|
4138
|
+
rb_enc_name(enc1), \
|
4139
|
+
rb_enc_name(enc2)); \
|
4140
|
+
yy_error(errbuf); \
|
4141
|
+
}
|
4142
|
+
#define mixed_escape(beg, enc1, enc2) do { \
|
4143
|
+
const char *pos = lex_p; \
|
4144
|
+
lex_p = (beg); \
|
4145
|
+
mixed_error((enc1), (enc2)); \
|
4146
|
+
lex_p = pos; \
|
4147
|
+
} while(0)
|
4148
|
+
|
4149
|
+
while((c = nextc()) != -1) {
|
4150
|
+
if(heredoc_indent > 0) {
|
4151
|
+
parser_update_heredoc_indent(parser_state, c);
|
4052
4152
|
}
|
4053
|
-
|
4054
|
-
|
4055
|
-
c
|
4056
|
-
|
4057
|
-
|
4058
|
-
if(func & STR_FUNC_QWORDS) break;
|
4059
|
-
if(func & STR_FUNC_EXPAND) continue;
|
4060
|
-
tokadd('\\');
|
4061
|
-
break;
|
4062
|
-
|
4063
|
-
case '\\':
|
4064
|
-
if(func & STR_FUNC_ESCAPE) tokadd(c);
|
4065
|
-
break;
|
4066
|
-
|
4067
|
-
case 'u':
|
4068
|
-
if((func & STR_FUNC_EXPAND) == 0) {
|
4069
|
-
tokadd('\\');
|
4153
|
+
if(paren && c == paren) {
|
4154
|
+
++*nest;
|
4155
|
+
} else if(c == term) {
|
4156
|
+
if(!nest || !*nest) {
|
4157
|
+
pushback(c);
|
4070
4158
|
break;
|
4071
4159
|
}
|
4072
|
-
|
4073
|
-
|
4074
|
-
|
4075
|
-
|
4160
|
+
--*nest;
|
4161
|
+
} else if((func & STR_FUNC_EXPAND) && c == '#' && lex_p < lex_pend) {
|
4162
|
+
int c2 = *lex_p;
|
4163
|
+
if(c2 == '$' || c2 == '@' || c2 == '{') {
|
4164
|
+
pushback(c);
|
4165
|
+
break;
|
4076
4166
|
}
|
4077
|
-
|
4167
|
+
} else if(c == '\\') {
|
4168
|
+
const char *beg = lex_p - 1;
|
4169
|
+
c = nextc();
|
4170
|
+
switch (c) {
|
4171
|
+
case '\n':
|
4172
|
+
if(func & STR_FUNC_QWORDS) break;
|
4173
|
+
if(func & STR_FUNC_EXPAND) continue;
|
4174
|
+
tokadd('\\');
|
4175
|
+
break;
|
4078
4176
|
|
4079
|
-
|
4080
|
-
|
4081
|
-
|
4082
|
-
|
4083
|
-
|
4177
|
+
case '\\':
|
4178
|
+
if(func & STR_FUNC_ESCAPE) tokadd(c);
|
4179
|
+
break;
|
4180
|
+
|
4181
|
+
case 'u':
|
4182
|
+
if((func & STR_FUNC_EXPAND) == 0) {
|
4183
|
+
tokadd('\\');
|
4184
|
+
break;
|
4185
|
+
}
|
4186
|
+
parser_tokadd_utf8(parser_state, &enc, 1,
|
4187
|
+
func & STR_FUNC_SYMBOL,
|
4188
|
+
func & STR_FUNC_REGEXP);
|
4084
4189
|
if(has_nonascii && enc != *encp) {
|
4085
4190
|
mixed_escape(beg, enc, *encp);
|
4086
4191
|
}
|
4087
4192
|
continue;
|
4088
|
-
|
4089
|
-
|
4090
|
-
if(
|
4091
|
-
c = read_escape(0, &enc);
|
4193
|
+
|
4194
|
+
default:
|
4195
|
+
if(c == -1) return -1;
|
4092
4196
|
if(!ISASCII(c)) {
|
4093
|
-
if(
|
4197
|
+
if((func & STR_FUNC_EXPAND) == 0) tokadd('\\');
|
4198
|
+
goto non_ascii;
|
4199
|
+
}
|
4200
|
+
if(func & STR_FUNC_REGEXP) {
|
4201
|
+
if(c == term && !simple_re_meta(c)) {
|
4202
|
+
tokadd(c);
|
4203
|
+
continue;
|
4204
|
+
}
|
4205
|
+
pushback(c);
|
4206
|
+
if((c = tokadd_escape(&enc)) < 0) {
|
4207
|
+
return -1;
|
4208
|
+
}
|
4209
|
+
if(has_nonascii && enc != *encp) {
|
4210
|
+
mixed_escape(beg, enc, *encp);
|
4211
|
+
}
|
4212
|
+
continue;
|
4213
|
+
} else if(func & STR_FUNC_EXPAND) {
|
4214
|
+
pushback(c);
|
4215
|
+
if(func & STR_FUNC_ESCAPE) tokadd('\\');
|
4216
|
+
c = read_escape(0, &enc);
|
4217
|
+
} else if((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
|
4218
|
+
/* ignore backslashed spaces in %w */
|
4219
|
+
} else if(c != term && !(paren && c == paren)) {
|
4220
|
+
tokadd('\\');
|
4221
|
+
pushback(c);
|
4094
4222
|
continue;
|
4095
4223
|
}
|
4096
|
-
}
|
4097
|
-
|
4098
|
-
|
4099
|
-
|
4100
|
-
|
4224
|
+
}
|
4225
|
+
} else if(!parser_isascii()) {
|
4226
|
+
non_ascii:
|
4227
|
+
has_nonascii = 1;
|
4228
|
+
if(enc != *encp) {
|
4229
|
+
mixed_error(enc, *encp);
|
4101
4230
|
continue;
|
4102
4231
|
}
|
4103
|
-
|
4104
|
-
} else if(!parser_isascii()) {
|
4105
|
-
has_nonascii = 1;
|
4106
|
-
if(enc != *encp) {
|
4107
|
-
mixed_error(enc, *encp);
|
4232
|
+
if(tokadd_mbchar(c) == -1) return -1;
|
4108
4233
|
continue;
|
4234
|
+
} else if((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
|
4235
|
+
pushback(c);
|
4236
|
+
break;
|
4109
4237
|
}
|
4110
|
-
if(
|
4111
|
-
|
4112
|
-
|
4113
|
-
|
4114
|
-
|
4115
|
-
|
4116
|
-
if(c & 0x80) {
|
4117
|
-
has_nonascii = 1;
|
4118
|
-
if(enc != *encp) {
|
4119
|
-
mixed_error(enc, *encp);
|
4120
|
-
continue;
|
4238
|
+
if(c & 0x80) {
|
4239
|
+
has_nonascii = 1;
|
4240
|
+
if(enc != *encp) {
|
4241
|
+
mixed_error(enc, *encp);
|
4242
|
+
continue;
|
4243
|
+
}
|
4121
4244
|
}
|
4245
|
+
tokadd(c);
|
4122
4246
|
}
|
4123
|
-
|
4124
|
-
|
4125
|
-
*encp = enc;
|
4126
|
-
return c;
|
4247
|
+
*encp = enc;
|
4248
|
+
return c;
|
4127
4249
|
}
|
4128
4250
|
|
4129
4251
|
#define NEW_STRTERM(func, term, paren) \
|
@@ -4133,12 +4255,12 @@ parser_tokadd_string(rb_parser_state *parser_state,
|
|
4133
4255
|
|
4134
4256
|
#define BIT(c, idx) (((c) / 32 - 1 == idx) ? (1U << ((c) % 32)) : 0)
|
4135
4257
|
#define SPECIAL_PUNCT(idx) ( \
|
4136
|
-
|
4137
|
-
|
4138
|
-
|
4139
|
-
|
4140
|
-
|
4141
|
-
|
4258
|
+
BIT('~', idx) | BIT('*', idx) | BIT('$', idx) | BIT('?', idx) | \
|
4259
|
+
BIT('!', idx) | BIT('@', idx) | BIT('/', idx) | BIT('\\', idx) | \
|
4260
|
+
BIT(';', idx) | BIT(',', idx) | BIT('.', idx) | BIT('=', idx) | \
|
4261
|
+
BIT(':', idx) | BIT('<', idx) | BIT('>', idx) | BIT('\"', idx) | \
|
4262
|
+
BIT('&', idx) | BIT('`', idx) | BIT('\'', idx) | BIT('+', idx) | \
|
4263
|
+
BIT('0', idx))
|
4142
4264
|
const unsigned int ruby_global_name_punct_bits[] = {
|
4143
4265
|
SPECIAL_PUNCT(0),
|
4144
4266
|
SPECIAL_PUNCT(1),
|
@@ -4147,16 +4269,12 @@ const unsigned int ruby_global_name_punct_bits[] = {
|
|
4147
4269
|
#undef BIT
|
4148
4270
|
#undef SPECIAL_PUNCT
|
4149
4271
|
|
4150
|
-
static inline int
|
4151
|
-
is_global_name_punct(const int c)
|
4152
|
-
{
|
4272
|
+
static inline int is_global_name_punct(const int c) {
|
4153
4273
|
if(c <= 0x20 || 0x7e < c) return 0;
|
4154
4274
|
return (ruby_global_name_punct_bits[(c - 0x20) / 32] >> (c % 32)) & 1;
|
4155
4275
|
}
|
4156
4276
|
|
4157
|
-
static int
|
4158
|
-
parser_peek_variable_name(rb_parser_state* parser_state)
|
4159
|
-
{
|
4277
|
+
static int parser_peek_variable_name(rb_parser_state* parser_state) {
|
4160
4278
|
int c;
|
4161
4279
|
const char *p = lex_p;
|
4162
4280
|
|
@@ -4165,7 +4283,7 @@ parser_peek_variable_name(rb_parser_state* parser_state)
|
|
4165
4283
|
switch(c) {
|
4166
4284
|
case '$':
|
4167
4285
|
if((c = *p) == '-') {
|
4168
|
-
if
|
4286
|
+
if(++p >= lex_pend) return 0;
|
4169
4287
|
c = *p;
|
4170
4288
|
} else if(is_global_name_punct(c) || ISDIGIT(c)) {
|
4171
4289
|
return tSTRING_DVAR;
|
@@ -4189,9 +4307,7 @@ parser_peek_variable_name(rb_parser_state* parser_state)
|
|
4189
4307
|
return 0;
|
4190
4308
|
}
|
4191
4309
|
|
4192
|
-
static int
|
4193
|
-
parser_parse_string(rb_parser_state* parser_state, NODE *quote)
|
4194
|
-
{
|
4310
|
+
static int parser_parse_string(rb_parser_state* parser_state, NODE *quote) {
|
4195
4311
|
int func = (int)quote->nd_func;
|
4196
4312
|
int term = nd_term(quote);
|
4197
4313
|
int paren = nd_paren(quote);
|
@@ -4248,15 +4364,18 @@ parser_parse_string(rb_parser_state* parser_state, NODE *quote)
|
|
4248
4364
|
|
4249
4365
|
/* Called when the lexer detects a heredoc is beginning. This pulls
|
4250
4366
|
in more characters and detects what kind of heredoc it is. */
|
4251
|
-
static int
|
4252
|
-
parser_heredoc_identifier(rb_parser_state* parser_state)
|
4253
|
-
{
|
4367
|
+
static int parser_heredoc_identifier(rb_parser_state* parser_state) {
|
4254
4368
|
int c = nextc(), term, func = 0;
|
4255
4369
|
size_t len;
|
4256
4370
|
|
4257
4371
|
if(c == '-') {
|
4258
4372
|
c = nextc();
|
4259
4373
|
func = STR_FUNC_INDENT;
|
4374
|
+
} else if(c == '~') {
|
4375
|
+
c = nextc();
|
4376
|
+
func = STR_FUNC_INDENT;
|
4377
|
+
heredoc_indent = INT_MAX;
|
4378
|
+
heredoc_line_indent = 0;
|
4260
4379
|
}
|
4261
4380
|
switch(c) {
|
4262
4381
|
case '\'':
|
@@ -4299,7 +4418,7 @@ parser_heredoc_identifier(rb_parser_state* parser_state)
|
|
4299
4418
|
if(!parser_is_identchar()) {
|
4300
4419
|
pushback(c);
|
4301
4420
|
if(func & STR_FUNC_INDENT) {
|
4302
|
-
pushback('-');
|
4421
|
+
pushback(heredoc_indent > 0 ? '~' : '-');
|
4303
4422
|
}
|
4304
4423
|
return 0;
|
4305
4424
|
}
|
@@ -4332,9 +4451,7 @@ parser_heredoc_identifier(rb_parser_state* parser_state)
|
|
4332
4451
|
return term == '`' ? tXSTRING_BEG : tSTRING_BEG;
|
4333
4452
|
}
|
4334
4453
|
|
4335
|
-
static void
|
4336
|
-
parser_heredoc_restore(rb_parser_state* parser_state, NODE *here)
|
4337
|
-
{
|
4454
|
+
static void parser_heredoc_restore(rb_parser_state* parser_state, NODE *here) {
|
4338
4455
|
VALUE line;
|
4339
4456
|
|
4340
4457
|
lex_strterm = 0;
|
@@ -4347,6 +4464,79 @@ parser_heredoc_restore(rb_parser_state* parser_state, NODE *here)
|
|
4347
4464
|
sourceline = nd_line(here);
|
4348
4465
|
}
|
4349
4466
|
|
4467
|
+
static int dedent_pos(const char *str, long len, int width) {
|
4468
|
+
int i, col = 0;
|
4469
|
+
|
4470
|
+
for(i = 0; i < len && col < width; i++) {
|
4471
|
+
if(str[i] == ' ') {
|
4472
|
+
col++;
|
4473
|
+
} else if(str[i] == '\t') {
|
4474
|
+
int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
|
4475
|
+
if(n > width) break;
|
4476
|
+
col = n;
|
4477
|
+
} else {
|
4478
|
+
break;
|
4479
|
+
}
|
4480
|
+
}
|
4481
|
+
return i;
|
4482
|
+
}
|
4483
|
+
|
4484
|
+
static VALUE parser_heredoc_dedent_string(VALUE input, int width, int first) {
|
4485
|
+
long len;
|
4486
|
+
int col;
|
4487
|
+
char *str, *p, *out_p, *end, *t;
|
4488
|
+
|
4489
|
+
RSTRING_GETMEM(input, str, len);
|
4490
|
+
end = &str[len];
|
4491
|
+
|
4492
|
+
p = str;
|
4493
|
+
if(!first) {
|
4494
|
+
p = (char*)memchr(p, '\n', end - p);
|
4495
|
+
if(!p) return input;
|
4496
|
+
p++;
|
4497
|
+
}
|
4498
|
+
out_p = p;
|
4499
|
+
while(p < end) {
|
4500
|
+
col = dedent_pos(p, end - p, width);
|
4501
|
+
p += col;
|
4502
|
+
if(!(t = (char*)memchr(p, '\n', end - p)))
|
4503
|
+
t = end;
|
4504
|
+
else
|
4505
|
+
++t;
|
4506
|
+
if(p > out_p) memmove(out_p, p, t - p);
|
4507
|
+
out_p += t - p;
|
4508
|
+
p = t;
|
4509
|
+
}
|
4510
|
+
rb_str_set_len(input, out_p - str);
|
4511
|
+
|
4512
|
+
return input;
|
4513
|
+
}
|
4514
|
+
|
4515
|
+
static void parser_heredoc_dedent(rb_parser_state* parser_state, NODE *root) {
|
4516
|
+
NODE *node, *str_node;
|
4517
|
+
int first = TRUE;
|
4518
|
+
int indent = heredoc_indent;
|
4519
|
+
|
4520
|
+
if(indent <= 0) return;
|
4521
|
+
|
4522
|
+
node = str_node = root;
|
4523
|
+
|
4524
|
+
while(str_node) {
|
4525
|
+
VALUE lit = str_node->nd_lit;
|
4526
|
+
if(NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
|
4527
|
+
rb_compile_error(parser_state, "dedent failure: %d: %ld", indent, lit);
|
4528
|
+
first = FALSE;
|
4529
|
+
|
4530
|
+
str_node = 0;
|
4531
|
+
while((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
|
4532
|
+
if((str_node = node->nd_head) != 0) {
|
4533
|
+
int type = nd_type(str_node);
|
4534
|
+
if(type == NODE_STR || type == NODE_DSTR) break;
|
4535
|
+
}
|
4536
|
+
}
|
4537
|
+
}
|
4538
|
+
}
|
4539
|
+
|
4350
4540
|
static int
|
4351
4541
|
parser_whole_match_p(rb_parser_state* parser_state, const char *eos, ssize_t len, int indent)
|
4352
4542
|
{
|
@@ -4393,6 +4583,14 @@ parser_number_literal_suffix(rb_parser_state* parser_state, int mask)
|
|
4393
4583
|
return 0;
|
4394
4584
|
}
|
4395
4585
|
pushback(c);
|
4586
|
+
if(c == '.') {
|
4587
|
+
c = peekc_n(1);
|
4588
|
+
if(ISDIGIT(c)) {
|
4589
|
+
yy_error("unexpected fraction part after numeric literal");
|
4590
|
+
lex_p += 2;
|
4591
|
+
while(parser_is_identchar()) nextc();
|
4592
|
+
}
|
4593
|
+
}
|
4396
4594
|
break;
|
4397
4595
|
}
|
4398
4596
|
|
@@ -4432,7 +4630,7 @@ static int
|
|
4432
4630
|
parser_set_integer_literal(rb_parser_state* parser_state, VALUE v, int suffix)
|
4433
4631
|
{
|
4434
4632
|
int type = tINTEGER;
|
4435
|
-
if
|
4633
|
+
if(suffix & NUM_SUFFIX_R) {
|
4436
4634
|
v = rb_funcall(rb_cObject, rb_intern("Rational"), 1, v);
|
4437
4635
|
type = tRATIONAL;
|
4438
4636
|
}
|
@@ -4443,9 +4641,7 @@ parser_set_integer_literal(rb_parser_state* parser_state, VALUE v, int suffix)
|
|
4443
4641
|
is responsible for detecting an expandions (ie #{}) in the heredoc
|
4444
4642
|
and emitting a lex token and also detecting the end of the heredoc. */
|
4445
4643
|
|
4446
|
-
static int
|
4447
|
-
parser_here_document(rb_parser_state* parser_state, NODE *here)
|
4448
|
-
{
|
4644
|
+
static int parser_here_document(rb_parser_state* parser_state, NODE *here) {
|
4449
4645
|
int c, func, indent = 0;
|
4450
4646
|
const char *eos, *p, *pend;
|
4451
4647
|
ssize_t len;
|
@@ -4479,6 +4675,7 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
|
|
4479
4675
|
we find the identifier. */
|
4480
4676
|
|
4481
4677
|
if((func & STR_FUNC_EXPAND) == 0) {
|
4678
|
+
int end = 0;
|
4482
4679
|
do {
|
4483
4680
|
p = RSTRING_PTR(lex_lastline);
|
4484
4681
|
pend = lex_pend;
|
@@ -4493,6 +4690,15 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
|
|
4493
4690
|
--pend;
|
4494
4691
|
}
|
4495
4692
|
}
|
4693
|
+
|
4694
|
+
if(heredoc_indent > 0) {
|
4695
|
+
long i = 0;
|
4696
|
+
while(p + i < pend && parser_update_heredoc_indent(parser_state, p[i])) {
|
4697
|
+
i++;
|
4698
|
+
}
|
4699
|
+
heredoc_line_indent = 0;
|
4700
|
+
}
|
4701
|
+
|
4496
4702
|
if(str) {
|
4497
4703
|
rb_str_cat(str, p, pend - p);
|
4498
4704
|
} else {
|
@@ -4503,7 +4709,7 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
|
|
4503
4709
|
if(nextc() == -1) {
|
4504
4710
|
goto error;
|
4505
4711
|
}
|
4506
|
-
} while(!whole_match_p(eos, len, indent));
|
4712
|
+
} while(!(end = whole_match_p(eos, len, indent)));
|
4507
4713
|
} else {
|
4508
4714
|
newtok();
|
4509
4715
|
if(c == '#') {
|
@@ -4543,17 +4749,14 @@ parser_here_document(rb_parser_state* parser_state, NODE *here)
|
|
4543
4749
|
|
4544
4750
|
#include "lex.c.blt"
|
4545
4751
|
|
4546
|
-
static int
|
4547
|
-
|
4548
|
-
|
4549
|
-
rb_warning("ambiguous first argument; put parentheses or even spaces");
|
4752
|
+
static int parser_arg_ambiguous(rb_parser_state* parser_state, char c) {
|
4753
|
+
rb_warningS(
|
4754
|
+
"ambiguous first argument; put parentheses or a space even after `%c' operator", c);
|
4550
4755
|
|
4551
4756
|
return 1;
|
4552
4757
|
}
|
4553
4758
|
|
4554
|
-
static ID
|
4555
|
-
parser_formal_argument(rb_parser_state* parser_state, ID lhs)
|
4556
|
-
{
|
4759
|
+
static ID parser_formal_argument(rb_parser_state* parser_state, ID lhs) {
|
4557
4760
|
if(!is_local_id(lhs)) {
|
4558
4761
|
yy_error("formal argument must be local variable");
|
4559
4762
|
}
|
@@ -4561,14 +4764,11 @@ parser_formal_argument(rb_parser_state* parser_state, ID lhs)
|
|
4561
4764
|
return lhs;
|
4562
4765
|
}
|
4563
4766
|
|
4564
|
-
static bool
|
4565
|
-
parser_lvar_defined(rb_parser_state* parser_state, ID id) {
|
4767
|
+
static bool parser_lvar_defined(rb_parser_state* parser_state, ID id) {
|
4566
4768
|
return (in_block() && bv_defined(id)) || local_id(id);
|
4567
4769
|
}
|
4568
4770
|
|
4569
|
-
static long
|
4570
|
-
parser_encode_length(rb_parser_state* parser_state, const char *name, long len)
|
4571
|
-
{
|
4771
|
+
static long parser_encode_length(rb_parser_state* parser_state, const char *name, long len) {
|
4572
4772
|
long nlen;
|
4573
4773
|
|
4574
4774
|
if(len > 5 && name[nlen = len - 5] == '-') {
|
@@ -4608,13 +4808,27 @@ parser_set_encode(rb_parser_state* parser_state, const char *name)
|
|
4608
4808
|
parser_state->enc = enc;
|
4609
4809
|
}
|
4610
4810
|
|
4811
|
+
static void
|
4812
|
+
parser_set_compile_option_flag(rb_parser_state* parser_state,
|
4813
|
+
const char *name, const char *val)
|
4814
|
+
{
|
4815
|
+
// TODO: 2.3
|
4816
|
+
}
|
4817
|
+
|
4818
|
+
static void
|
4819
|
+
parser_set_token_info(rb_parser_state* parser_state,
|
4820
|
+
const char *name, const char *val)
|
4821
|
+
{
|
4822
|
+
// TODO: 2.3
|
4823
|
+
}
|
4824
|
+
|
4611
4825
|
static int
|
4612
4826
|
comment_at_top(rb_parser_state* parser_state)
|
4613
4827
|
{
|
4614
4828
|
const char *p = lex_pbeg, *pend = lex_p - 1;
|
4615
4829
|
if(line_count != (has_shebang ? 2 : 1)) return FALSE;
|
4616
4830
|
while(p < pend) {
|
4617
|
-
if
|
4831
|
+
if(!ISSPACE(*p)) return FALSE;
|
4618
4832
|
p++;
|
4619
4833
|
}
|
4620
4834
|
return TRUE;
|
@@ -4638,14 +4852,14 @@ struct magic_comment {
|
|
4638
4852
|
rb_magic_comment_length_t length;
|
4639
4853
|
};
|
4640
4854
|
|
4641
|
-
static const struct magic_comment magic_comments[] = {
|
4642
|
-
|
4643
|
-
|
4855
|
+
static const struct magic_comment magic_comments[4] = {
|
4856
|
+
{"coding", magic_comment_encoding, parser_encode_length},
|
4857
|
+
{"encoding", magic_comment_encoding, parser_encode_length},
|
4858
|
+
{"frozen_string_literal", parser_set_compile_option_flag},
|
4859
|
+
{"warn_indent", parser_set_token_info},
|
4644
4860
|
};
|
4645
4861
|
|
4646
|
-
static const char *
|
4647
|
-
magic_comment_marker(const char *str, long len)
|
4648
|
-
{
|
4862
|
+
static const char * magic_comment_marker(const char *str, long len) {
|
4649
4863
|
long i = 2;
|
4650
4864
|
|
4651
4865
|
while(i < len) {
|
@@ -4674,9 +4888,8 @@ magic_comment_marker(const char *str, long len)
|
|
4674
4888
|
return 0;
|
4675
4889
|
}
|
4676
4890
|
|
4677
|
-
static int
|
4678
|
-
|
4679
|
-
{
|
4891
|
+
static int parser_magic_comment(rb_parser_state* parser_state, const char *str, long len) {
|
4892
|
+
int indicator = 0;
|
4680
4893
|
VALUE name = 0, val = 0;
|
4681
4894
|
const char *beg, *end, *vbeg, *vend;
|
4682
4895
|
|
@@ -4686,10 +4899,15 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
|
|
4686
4899
|
: (void)((_s) = REF(STR_NEW((_p), (_n)))))
|
4687
4900
|
|
4688
4901
|
if(len <= 7) return FALSE;
|
4689
|
-
|
4690
|
-
if(
|
4691
|
-
|
4692
|
-
|
4902
|
+
|
4903
|
+
if(!!(beg = magic_comment_marker(str, len))) {
|
4904
|
+
if(!(end = magic_comment_marker(beg, str + len - beg))) {
|
4905
|
+
return FALSE;
|
4906
|
+
}
|
4907
|
+
indicator = TRUE;
|
4908
|
+
str = beg;
|
4909
|
+
len = end - beg - 3;
|
4910
|
+
}
|
4693
4911
|
|
4694
4912
|
/* %r"([^\\s\'\":;]+)\\s*:\\s*(\"(?:\\\\.|[^\"])*\"|[^\"\\s;]+)[\\s;]*" */
|
4695
4913
|
while(len > 0) {
|
@@ -4719,7 +4937,10 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
|
|
4719
4937
|
// nothing
|
4720
4938
|
}
|
4721
4939
|
if(!len) break;
|
4722
|
-
if(*str != ':')
|
4940
|
+
if(*str != ':') {
|
4941
|
+
if(!indicator) return FALSE;
|
4942
|
+
continue;
|
4943
|
+
}
|
4723
4944
|
|
4724
4945
|
do str++; while(--len > 0 && ISSPACE(*str));
|
4725
4946
|
if(!len) break;
|
@@ -4743,7 +4964,12 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
|
|
4743
4964
|
}
|
4744
4965
|
vend = str;
|
4745
4966
|
}
|
4746
|
-
|
4967
|
+
if(indicator) {
|
4968
|
+
while(len > 0 && (*str == ';' || ISSPACE(*str))) --len, str++;
|
4969
|
+
} else {
|
4970
|
+
while(len > 0 && (ISSPACE(*str))) --len, str++;
|
4971
|
+
if(len) return FALSE;
|
4972
|
+
}
|
4747
4973
|
|
4748
4974
|
n = end - beg;
|
4749
4975
|
str_copy(name, beg, n);
|
@@ -4752,7 +4978,7 @@ parser_magic_comment(rb_parser_state* parser_state, const char *str, long len)
|
|
4752
4978
|
if(s[i] == '-') s[i] = '_';
|
4753
4979
|
}
|
4754
4980
|
do {
|
4755
|
-
if(strncasecmp(p->name, s, n) == 0) {
|
4981
|
+
if(strncasecmp(p->name, s, n) == 0 && !p->name[n]) {
|
4756
4982
|
n = vend - vbeg;
|
4757
4983
|
if(p->length) {
|
4758
4984
|
n = (*p->length)(parser_state, vbeg, n);
|
@@ -4822,7 +5048,7 @@ parser_prepare(rb_parser_state* parser_state)
|
|
4822
5048
|
case '#':
|
4823
5049
|
if(peek('!')) has_shebang = 1;
|
4824
5050
|
break;
|
4825
|
-
case 0xef:
|
5051
|
+
case 0xef: /* UTF-8 BOM marker */
|
4826
5052
|
if(lex_pend - lex_p >= 2 &&
|
4827
5053
|
(unsigned char)lex_p[0] == 0xbb &&
|
4828
5054
|
(unsigned char)lex_p[1] == 0xbf) {
|
@@ -4841,10 +5067,11 @@ parser_prepare(rb_parser_state* parser_state)
|
|
4841
5067
|
|
4842
5068
|
#define IS_ARG() lex_state_p(EXPR_ARG_ANY)
|
4843
5069
|
#define IS_END() lex_state_p(EXPR_END_ANY)
|
4844
|
-
#define IS_BEG() lex_state_p(EXPR_BEG_ANY)
|
5070
|
+
#define IS_BEG() (lex_state_p(EXPR_BEG_ANY) \
|
5071
|
+
|| lex_state_all_p(EXPR_ARG | EXPR_LABELED))
|
4845
5072
|
#define IS_SPCARG(c) (IS_ARG() && space_seen && !ISSPACE(c))
|
4846
|
-
#define IS_LABEL_POSSIBLE() ((lex_state_p(
|
4847
|
-
|
5073
|
+
#define IS_LABEL_POSSIBLE() ((lex_state_p(EXPR_LABEL | EXPR_ENDFN) && !cmd_state) || \
|
5074
|
+
IS_ARG())
|
4848
5075
|
#define IS_LABEL_SUFFIX(n) (peek_n(':',(n)) && !peek_n(':', (n)+1))
|
4849
5076
|
#define IS_AFTER_OPERATOR() lex_state_p(EXPR_FNAME | EXPR_DOT)
|
4850
5077
|
|
@@ -4857,28 +5084,786 @@ parser_prepare(rb_parser_state* parser_state)
|
|
4857
5084
|
space_seen && !ISSPACE(c) && \
|
4858
5085
|
(ambiguous_operator(op, syn), 0)))
|
4859
5086
|
|
4860
|
-
static
|
4861
|
-
|
5087
|
+
static VALUE
|
5088
|
+
parse_rational(rb_parser_state* parser_state, char *str, int len, int seen_point)
|
4862
5089
|
{
|
4863
|
-
|
4864
|
-
|
4865
|
-
|
4866
|
-
|
4867
|
-
|
4868
|
-
|
4869
|
-
|
5090
|
+
VALUE v;
|
5091
|
+
char *point = &str[seen_point];
|
5092
|
+
size_t fraclen = len-seen_point-1;
|
5093
|
+
memmove(point, point+1, fraclen+1);
|
5094
|
+
v = rb_cstr_to_inum(str, 10, FALSE);
|
5095
|
+
return rb_rational_new(v,
|
5096
|
+
rb_funcall(INT2FIX(10), rb_intern("**"), 1, INT2NUM(fraclen)));
|
5097
|
+
}
|
4870
5098
|
|
4871
|
-
|
4872
|
-
|
4873
|
-
|
4874
|
-
|
4875
|
-
|
4876
|
-
|
4877
|
-
|
4878
|
-
|
4879
|
-
|
4880
|
-
|
4881
|
-
|
5099
|
+
static int parse_numeric(rb_parser_state* parser_state, int c) {
|
5100
|
+
int is_float, seen_point, seen_e, nondigit;
|
5101
|
+
int suffix;
|
5102
|
+
|
5103
|
+
is_float = seen_point = seen_e = nondigit = 0;
|
5104
|
+
SET_LEX_STATE(EXPR_END);
|
5105
|
+
newtok();
|
5106
|
+
if(c == '-' || c == '+') {
|
5107
|
+
tokadd(c);
|
5108
|
+
c = nextc();
|
5109
|
+
}
|
5110
|
+
if(c == '0') {
|
5111
|
+
#define no_digits() do {yy_error("numeric literal without digits"); return 0;} while(0)
|
5112
|
+
int start = toklen();
|
5113
|
+
c = nextc();
|
5114
|
+
if(c == 'x' || c == 'X') {
|
5115
|
+
/* hexadecimal */
|
5116
|
+
c = nextc();
|
5117
|
+
if(c != -1 && ISXDIGIT(c)) {
|
5118
|
+
do {
|
5119
|
+
if(c == '_') {
|
5120
|
+
if(nondigit) break;
|
5121
|
+
nondigit = c;
|
5122
|
+
continue;
|
5123
|
+
}
|
5124
|
+
if(!ISXDIGIT(c)) break;
|
5125
|
+
nondigit = 0;
|
5126
|
+
tokadd(c);
|
5127
|
+
} while((c = nextc()) != -1);
|
5128
|
+
}
|
5129
|
+
pushback(c);
|
5130
|
+
tokfix();
|
5131
|
+
if(toklen() == start) {
|
5132
|
+
no_digits();
|
5133
|
+
} else if(nondigit) {
|
5134
|
+
goto trailing_uc;
|
5135
|
+
}
|
5136
|
+
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5137
|
+
return set_integer_literal(rb_cstr_to_inum(tok(), 16, FALSE), suffix);
|
5138
|
+
}
|
5139
|
+
if(c == 'b' || c == 'B') {
|
5140
|
+
/* binary */
|
5141
|
+
c = nextc();
|
5142
|
+
if(c == '0' || c == '1') {
|
5143
|
+
do {
|
5144
|
+
if(c == '_') {
|
5145
|
+
if(nondigit) break;
|
5146
|
+
nondigit = c;
|
5147
|
+
continue;
|
5148
|
+
}
|
5149
|
+
if(c != '0' && c != '1') break;
|
5150
|
+
nondigit = 0;
|
5151
|
+
tokadd(c);
|
5152
|
+
} while((c = nextc()) != -1);
|
5153
|
+
}
|
5154
|
+
pushback(c);
|
5155
|
+
tokfix();
|
5156
|
+
if(toklen() == start) {
|
5157
|
+
no_digits();
|
5158
|
+
} else if(nondigit) {
|
5159
|
+
goto trailing_uc;
|
5160
|
+
}
|
5161
|
+
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5162
|
+
return set_integer_literal(rb_cstr_to_inum(tok(), 2, FALSE), suffix);
|
5163
|
+
}
|
5164
|
+
if(c == 'd' || c == 'D') {
|
5165
|
+
/* decimal */
|
5166
|
+
c = nextc();
|
5167
|
+
if(c != -1 && ISDIGIT(c)) {
|
5168
|
+
do {
|
5169
|
+
if(c == '_') {
|
5170
|
+
if(nondigit) break;
|
5171
|
+
nondigit = c;
|
5172
|
+
continue;
|
5173
|
+
}
|
5174
|
+
if(!ISDIGIT(c)) break;
|
5175
|
+
nondigit = 0;
|
5176
|
+
tokadd(c);
|
5177
|
+
} while((c = nextc()) != -1);
|
5178
|
+
}
|
5179
|
+
pushback(c);
|
5180
|
+
tokfix();
|
5181
|
+
if(toklen() == start) {
|
5182
|
+
no_digits();
|
5183
|
+
} else if(nondigit) {
|
5184
|
+
goto trailing_uc;
|
5185
|
+
}
|
5186
|
+
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5187
|
+
return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
|
5188
|
+
}
|
5189
|
+
if(c == '_') {
|
5190
|
+
/* 0_0 */
|
5191
|
+
goto octal_number;
|
5192
|
+
}
|
5193
|
+
if(c == 'o' || c == 'O') {
|
5194
|
+
/* prefixed octal */
|
5195
|
+
c = nextc();
|
5196
|
+
if(c == -1 || c == '_' || !ISDIGIT(c)) {
|
5197
|
+
no_digits();
|
5198
|
+
}
|
5199
|
+
}
|
5200
|
+
if(c >= '0' && c <= '7') {
|
5201
|
+
/* octal */
|
5202
|
+
octal_number:
|
5203
|
+
do {
|
5204
|
+
if(c == '_') {
|
5205
|
+
if(nondigit) break;
|
5206
|
+
nondigit = c;
|
5207
|
+
continue;
|
5208
|
+
}
|
5209
|
+
if(c < '0' || c > '9') break;
|
5210
|
+
if(c > '7') goto invalid_octal;
|
5211
|
+
nondigit = 0;
|
5212
|
+
tokadd(c);
|
5213
|
+
} while((c = nextc()) != -1);
|
5214
|
+
if(toklen() > start) {
|
5215
|
+
pushback(c);
|
5216
|
+
tokfix();
|
5217
|
+
if(nondigit) goto trailing_uc;
|
5218
|
+
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5219
|
+
return set_integer_literal(rb_cstr_to_inum(tok(), 8, FALSE), suffix);
|
5220
|
+
}
|
5221
|
+
if(nondigit) {
|
5222
|
+
pushback(c);
|
5223
|
+
goto trailing_uc;
|
5224
|
+
}
|
5225
|
+
}
|
5226
|
+
if(c > '7' && c <= '9') {
|
5227
|
+
invalid_octal:
|
5228
|
+
yy_error("Invalid octal digit");
|
5229
|
+
} else if(c == '.' || c == 'e' || c == 'E') {
|
5230
|
+
tokadd('0');
|
5231
|
+
} else {
|
5232
|
+
pushback(c);
|
5233
|
+
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5234
|
+
return set_integer_literal(INT2FIX(0), suffix);
|
5235
|
+
}
|
5236
|
+
}
|
5237
|
+
|
5238
|
+
for(;;) {
|
5239
|
+
switch(c) {
|
5240
|
+
case '0': case '1': case '2': case '3': case '4':
|
5241
|
+
case '5': case '6': case '7': case '8': case '9':
|
5242
|
+
nondigit = 0;
|
5243
|
+
tokadd(c);
|
5244
|
+
break;
|
5245
|
+
|
5246
|
+
case '.':
|
5247
|
+
if(nondigit) goto trailing_uc;
|
5248
|
+
if(seen_point || seen_e) {
|
5249
|
+
goto decode_num;
|
5250
|
+
} else {
|
5251
|
+
int c0 = nextc();
|
5252
|
+
if(c0 == -1 || !ISDIGIT(c0)) {
|
5253
|
+
pushback(c0);
|
5254
|
+
goto decode_num;
|
5255
|
+
}
|
5256
|
+
c = c0;
|
5257
|
+
}
|
5258
|
+
seen_point = toklen();
|
5259
|
+
tokadd('.');
|
5260
|
+
tokadd(c);
|
5261
|
+
is_float++;
|
5262
|
+
nondigit = 0;
|
5263
|
+
break;
|
5264
|
+
|
5265
|
+
case 'e':
|
5266
|
+
case 'E':
|
5267
|
+
if(nondigit) {
|
5268
|
+
pushback(c);
|
5269
|
+
c = nondigit;
|
5270
|
+
goto decode_num;
|
5271
|
+
}
|
5272
|
+
if(seen_e) {
|
5273
|
+
goto decode_num;
|
5274
|
+
}
|
5275
|
+
nondigit = c;
|
5276
|
+
c = nextc();
|
5277
|
+
if(c != '-' && c != '+' && !ISDIGIT(c)) {
|
5278
|
+
pushback(c);
|
5279
|
+
nondigit = 0;
|
5280
|
+
goto decode_num;
|
5281
|
+
}
|
5282
|
+
tokadd(nondigit);
|
5283
|
+
seen_e++;
|
5284
|
+
is_float++;
|
5285
|
+
tokadd(c);
|
5286
|
+
nondigit = (c == '-' || c == '+') ? c : 0;
|
5287
|
+
break;
|
5288
|
+
|
5289
|
+
case '_': /* `_' in number just ignored */
|
5290
|
+
if(nondigit) goto decode_num;
|
5291
|
+
nondigit = c;
|
5292
|
+
break;
|
5293
|
+
|
5294
|
+
default:
|
5295
|
+
goto decode_num;
|
5296
|
+
}
|
5297
|
+
c = nextc();
|
5298
|
+
}
|
5299
|
+
|
5300
|
+
decode_num:
|
5301
|
+
pushback(c);
|
5302
|
+
if(nondigit) {
|
5303
|
+
char tmp[30];
|
5304
|
+
trailing_uc:
|
5305
|
+
snprintf(tmp, sizeof(tmp), "trailing `%c' in number", nondigit);
|
5306
|
+
yy_error(tmp);
|
5307
|
+
}
|
5308
|
+
tokfix();
|
5309
|
+
if(is_float) {
|
5310
|
+
int type = tFLOAT;
|
5311
|
+
VALUE v;
|
5312
|
+
|
5313
|
+
suffix = number_literal_suffix(seen_e ? NUM_SUFFIX_I : NUM_SUFFIX_ALL);
|
5314
|
+
if(suffix & NUM_SUFFIX_R) {
|
5315
|
+
type = tRATIONAL;
|
5316
|
+
v = parse_rational(parser_state, tok(), toklen(), seen_point);
|
5317
|
+
} else {
|
5318
|
+
double d = strtod(tok(), 0);
|
5319
|
+
if(errno == ERANGE) {
|
5320
|
+
rb_warningS("Float %s out of range", tok());
|
5321
|
+
errno = 0;
|
5322
|
+
}
|
5323
|
+
v = rb_float_new(d);
|
5324
|
+
}
|
5325
|
+
return set_number_literal(v, type, suffix);
|
5326
|
+
}
|
5327
|
+
|
5328
|
+
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5329
|
+
return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
|
5330
|
+
}
|
5331
|
+
|
5332
|
+
static int parse_qmark(rb_parser_state* parser_state) {
|
5333
|
+
rb_encoding *enc;
|
5334
|
+
int c;
|
5335
|
+
|
5336
|
+
if(IS_END()) {
|
5337
|
+
SET_LEX_STATE(EXPR_VALUE);
|
5338
|
+
return '?';
|
5339
|
+
}
|
5340
|
+
c = nextc();
|
5341
|
+
if(c == -1) {
|
5342
|
+
rb_compile_error(parser_state, "incomplete character syntax");
|
5343
|
+
return 0;
|
5344
|
+
}
|
5345
|
+
if(parser_enc_isspace(c, parser_state->enc)) {
|
5346
|
+
if(!IS_ARG()) {
|
5347
|
+
int c2 = 0;
|
5348
|
+
switch(c) {
|
5349
|
+
case ' ':
|
5350
|
+
c2 = 's';
|
5351
|
+
break;
|
5352
|
+
case '\n':
|
5353
|
+
c2 = 'n';
|
5354
|
+
break;
|
5355
|
+
case '\t':
|
5356
|
+
c2 = 't';
|
5357
|
+
break;
|
5358
|
+
case '\v':
|
5359
|
+
c2 = 'v';
|
5360
|
+
break;
|
5361
|
+
case '\r':
|
5362
|
+
c2 = 'r';
|
5363
|
+
break;
|
5364
|
+
case '\f':
|
5365
|
+
c2 = 'f';
|
5366
|
+
break;
|
5367
|
+
}
|
5368
|
+
if(c2) {
|
5369
|
+
rb_warn("invalid character syntax; use ?\\%c", c2);
|
5370
|
+
}
|
5371
|
+
}
|
5372
|
+
ternary:
|
5373
|
+
pushback(c);
|
5374
|
+
SET_LEX_STATE(EXPR_VALUE);
|
5375
|
+
return '?';
|
5376
|
+
}
|
5377
|
+
newtok();
|
5378
|
+
enc = parser_state->enc;
|
5379
|
+
if(!parser_isascii()) {
|
5380
|
+
if(tokadd_mbchar(c) == -1) return 0;
|
5381
|
+
} else if((parser_enc_isalnum(c, parser_state->enc) || c == '_') &&
|
5382
|
+
lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser_state->enc)) {
|
5383
|
+
goto ternary;
|
5384
|
+
} else if(c == '\\') {
|
5385
|
+
if(peek('u')) {
|
5386
|
+
nextc();
|
5387
|
+
c = parser_tokadd_utf8(parser_state, &enc, 0, 0, 0);
|
5388
|
+
if(0x80 <= c) {
|
5389
|
+
tokaddmbc(c, enc);
|
5390
|
+
} else {
|
5391
|
+
tokadd(c);
|
5392
|
+
}
|
5393
|
+
} else if(!lex_eol_p() && !(c = *lex_p, ISASCII(c))) {
|
5394
|
+
nextc();
|
5395
|
+
if(tokadd_mbchar(c) == -1) return 0;
|
5396
|
+
} else {
|
5397
|
+
c = read_escape(0, &enc);
|
5398
|
+
tokadd(c);
|
5399
|
+
}
|
5400
|
+
} else {
|
5401
|
+
tokadd(c);
|
5402
|
+
}
|
5403
|
+
tokfix();
|
5404
|
+
set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0));
|
5405
|
+
SET_LEX_STATE(EXPR_END);
|
5406
|
+
return tCHAR;
|
5407
|
+
}
|
5408
|
+
|
5409
|
+
static int
|
5410
|
+
parse_percent(rb_parser_state* parser_state, const int space_seen,
|
5411
|
+
const enum lex_state_e last_state)
|
5412
|
+
{
|
5413
|
+
int c;
|
5414
|
+
|
5415
|
+
if(IS_BEG()) {
|
5416
|
+
int term;
|
5417
|
+
int paren;
|
5418
|
+
|
5419
|
+
c = nextc();
|
5420
|
+
quotation:
|
5421
|
+
if(c == -1 || !ISALNUM(c)) {
|
5422
|
+
term = c;
|
5423
|
+
c = 'Q';
|
5424
|
+
} else {
|
5425
|
+
term = nextc();
|
5426
|
+
if(parser_enc_isalnum((int)term, parser_state->enc) || !parser_isascii()) {
|
5427
|
+
yy_error("unknown type of %string");
|
5428
|
+
return 0;
|
5429
|
+
}
|
5430
|
+
}
|
5431
|
+
if(c == -1 || term == -1) {
|
5432
|
+
rb_compile_error(parser_state, "unterminated quoted string meets end of file");
|
5433
|
+
return 0;
|
5434
|
+
}
|
5435
|
+
paren = term;
|
5436
|
+
if(term == '(') term = ')';
|
5437
|
+
else if(term == '[') term = ']';
|
5438
|
+
else if(term == '{') term = '}';
|
5439
|
+
else if(term == '<') term = '>';
|
5440
|
+
else paren = 0;
|
5441
|
+
|
5442
|
+
switch(c) {
|
5443
|
+
case 'Q':
|
5444
|
+
lex_strterm = NEW_STRTERM(str_dquote, term, paren);
|
5445
|
+
return tSTRING_BEG;
|
5446
|
+
|
5447
|
+
case 'q':
|
5448
|
+
lex_strterm = NEW_STRTERM(str_squote, term, paren);
|
5449
|
+
return tSTRING_BEG;
|
5450
|
+
|
5451
|
+
case 'W':
|
5452
|
+
lex_strterm = NEW_STRTERM(str_dword, term, paren);
|
5453
|
+
do {c = nextc();} while(ISSPACE(c));
|
5454
|
+
pushback(c);
|
5455
|
+
return tWORDS_BEG;
|
5456
|
+
|
5457
|
+
case 'w':
|
5458
|
+
lex_strterm = NEW_STRTERM(str_sword, term, paren);
|
5459
|
+
do {c = nextc();} while(ISSPACE(c));
|
5460
|
+
pushback(c);
|
5461
|
+
return tQWORDS_BEG;
|
5462
|
+
|
5463
|
+
case 'I':
|
5464
|
+
lex_strterm = NEW_STRTERM(str_dword, term, paren);
|
5465
|
+
do {c = nextc();} while(ISSPACE(c));
|
5466
|
+
pushback(c);
|
5467
|
+
return tSYMBOLS_BEG;
|
5468
|
+
|
5469
|
+
case 'i':
|
5470
|
+
lex_strterm = NEW_STRTERM(str_sword, term, paren);
|
5471
|
+
do {c = nextc();} while(ISSPACE(c));
|
5472
|
+
pushback(c);
|
5473
|
+
return tQSYMBOLS_BEG;
|
5474
|
+
|
5475
|
+
case 'x':
|
5476
|
+
lex_strterm = NEW_STRTERM(str_xquote, term, paren);
|
5477
|
+
return tXSTRING_BEG;
|
5478
|
+
|
5479
|
+
case 'r':
|
5480
|
+
lex_strterm = NEW_STRTERM(str_regexp, term, paren);
|
5481
|
+
return tREGEXP_BEG;
|
5482
|
+
|
5483
|
+
case 's':
|
5484
|
+
lex_strterm = NEW_STRTERM(str_ssym, term, paren);
|
5485
|
+
SET_LEX_STATE(EXPR_FNAME | EXPR_FITEM);
|
5486
|
+
return tSYMBEG;
|
5487
|
+
|
5488
|
+
default:
|
5489
|
+
yy_error("unknown type of %string");
|
5490
|
+
return 0;
|
5491
|
+
}
|
5492
|
+
}
|
5493
|
+
if((c = nextc()) == '=') {
|
5494
|
+
set_yylval_id('%');
|
5495
|
+
SET_LEX_STATE(EXPR_BEG);
|
5496
|
+
return tOP_ASGN;
|
5497
|
+
}
|
5498
|
+
if(IS_SPCARG(c) || (lex_state_p(EXPR_FITEM) && c == 's')) {
|
5499
|
+
goto quotation;
|
5500
|
+
}
|
5501
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
5502
|
+
pushback(c);
|
5503
|
+
warn_balanced("%%", "string literal");
|
5504
|
+
return '%';
|
5505
|
+
}
|
5506
|
+
|
5507
|
+
static int tokadd_ident(rb_parser_state* parser_state, int c) {
|
5508
|
+
do {
|
5509
|
+
if(tokadd_mbchar(c) == -1) return -1;
|
5510
|
+
c = nextc();
|
5511
|
+
} while(parser_is_identchar());
|
5512
|
+
pushback(c);
|
5513
|
+
return 0;
|
5514
|
+
}
|
5515
|
+
|
5516
|
+
static ID tokenize_ident(rb_parser_state* parser_state, const enum lex_state_e last_state) {
|
5517
|
+
ID ident = TOK_INTERN();
|
5518
|
+
|
5519
|
+
set_yylval_name(ident);
|
5520
|
+
|
5521
|
+
return ident;
|
5522
|
+
}
|
5523
|
+
|
5524
|
+
const signed char ruby_digit36_to_number_table[] = {
|
5525
|
+
/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
|
5526
|
+
/*0*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5527
|
+
/*1*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5528
|
+
/*2*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5529
|
+
/*3*/ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
|
5530
|
+
/*4*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
5531
|
+
/*5*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
5532
|
+
/*6*/ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
|
5533
|
+
/*7*/ 25,26,27,28,29,30,31,32,33,34,35,-1,-1,-1,-1,-1,
|
5534
|
+
/*8*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5535
|
+
/*9*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5536
|
+
/*a*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5537
|
+
/*b*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5538
|
+
/*c*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5539
|
+
/*d*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5540
|
+
/*e*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5541
|
+
/*f*/ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
5542
|
+
};
|
5543
|
+
|
5544
|
+
unsigned long
|
5545
|
+
ruby_scan_digits(const char *str, ssize_t len, int base, size_t *retlen, int *overflow)
|
5546
|
+
{
|
5547
|
+
const char *start = str;
|
5548
|
+
unsigned long ret = 0, x;
|
5549
|
+
unsigned long mul_overflow = (~(unsigned long)0) / base;
|
5550
|
+
|
5551
|
+
*overflow = 0;
|
5552
|
+
|
5553
|
+
if(!len) {
|
5554
|
+
*retlen = 0;
|
5555
|
+
return 0;
|
5556
|
+
}
|
5557
|
+
|
5558
|
+
do {
|
5559
|
+
int d = ruby_digit36_to_number_table[(unsigned char)*str++];
|
5560
|
+
if(d == -1 || base <= d) {
|
5561
|
+
--str;
|
5562
|
+
break;
|
5563
|
+
}
|
5564
|
+
if(mul_overflow < ret) {
|
5565
|
+
*overflow = 1;
|
5566
|
+
}
|
5567
|
+
ret *= base;
|
5568
|
+
x = ret;
|
5569
|
+
ret += d;
|
5570
|
+
if(ret < x) {
|
5571
|
+
*overflow = 1;
|
5572
|
+
}
|
5573
|
+
} while(len < 0 || --len);
|
5574
|
+
|
5575
|
+
*retlen = str - start;
|
5576
|
+
return ret;
|
5577
|
+
}
|
5578
|
+
|
5579
|
+
static int parse_numvar(rb_parser_state* parser_state) {
|
5580
|
+
size_t len;
|
5581
|
+
int overflow;
|
5582
|
+
unsigned long n = ruby_scan_digits(tok()+1, toklen()-1, 10, &len, &overflow);
|
5583
|
+
const unsigned long nth_ref_max =
|
5584
|
+
((FIXNUM_MAX < INT_MAX) ? FIXNUM_MAX : INT_MAX) >> 1;
|
5585
|
+
/* NTH_REF is left-shifted to be ORed with back-ref flag and
|
5586
|
+
* turned into a Fixnum, in compile.c */
|
5587
|
+
|
5588
|
+
if(overflow || n > nth_ref_max) {
|
5589
|
+
/* compile_error()? */
|
5590
|
+
rb_warnS("`%s' is too big for a number variable, always nil", tok());
|
5591
|
+
return 0; /* $0 is $PROGRAM_NAME, not NTH_REF */
|
5592
|
+
} else {
|
5593
|
+
return (int)n;
|
5594
|
+
}
|
5595
|
+
}
|
5596
|
+
|
5597
|
+
static int parse_gvar(rb_parser_state* parser_state, const enum lex_state_e last_state) {
|
5598
|
+
int c;
|
5599
|
+
|
5600
|
+
SET_LEX_STATE(EXPR_END);
|
5601
|
+
newtok();
|
5602
|
+
c = nextc();
|
5603
|
+
switch(c) {
|
5604
|
+
case '_': /* $_: last read line string */
|
5605
|
+
c = nextc();
|
5606
|
+
if(parser_is_identchar()) {
|
5607
|
+
tokadd('$');
|
5608
|
+
tokadd('_');
|
5609
|
+
break;
|
5610
|
+
}
|
5611
|
+
pushback(c);
|
5612
|
+
c = '_';
|
5613
|
+
/* fall through */
|
5614
|
+
case '~': /* $~: match-data */
|
5615
|
+
case '*': /* $*: argv */
|
5616
|
+
case '$': /* $$: pid */
|
5617
|
+
case '?': /* $?: last status */
|
5618
|
+
case '!': /* $!: error string */
|
5619
|
+
case '@': /* $@: error position */
|
5620
|
+
case '/': /* $/: input record separator */
|
5621
|
+
case '\\': /* $\: output record separator */
|
5622
|
+
case ';': /* $;: field separator */
|
5623
|
+
case ',': /* $,: output field separator */
|
5624
|
+
case '.': /* $.: last read line number */
|
5625
|
+
case '=': /* $=: ignorecase */
|
5626
|
+
case ':': /* $:: load path */
|
5627
|
+
case '<': /* $<: reading filename */
|
5628
|
+
case '>': /* $>: default output handle */
|
5629
|
+
case '\"': /* $": already loaded files */
|
5630
|
+
tokadd('$');
|
5631
|
+
tokadd(c);
|
5632
|
+
goto gvar;
|
5633
|
+
|
5634
|
+
case '-':
|
5635
|
+
tokadd('$');
|
5636
|
+
tokadd(c);
|
5637
|
+
c = nextc();
|
5638
|
+
if(parser_is_identchar()) {
|
5639
|
+
if(tokadd_mbchar(c) == -1) return 0;
|
5640
|
+
} else {
|
5641
|
+
pushback(c);
|
5642
|
+
pushback('-');
|
5643
|
+
return '$';
|
5644
|
+
}
|
5645
|
+
gvar:
|
5646
|
+
tokfix();
|
5647
|
+
set_yylval_name(TOK_INTERN());
|
5648
|
+
return tGVAR;
|
5649
|
+
|
5650
|
+
case '&': /* $&: last match */
|
5651
|
+
case '`': /* $`: string before last match */
|
5652
|
+
case '\'': /* $': string after last match */
|
5653
|
+
case '+': /* $+: string matches last paren. */
|
5654
|
+
if(lex_state_of_p(last_state, EXPR_FNAME)) {
|
5655
|
+
tokadd('$');
|
5656
|
+
tokadd(c);
|
5657
|
+
goto gvar;
|
5658
|
+
}
|
5659
|
+
set_yylval_node(NEW_BACK_REF(c));
|
5660
|
+
return tBACK_REF;
|
5661
|
+
|
5662
|
+
case '1': case '2': case '3':
|
5663
|
+
case '4': case '5': case '6':
|
5664
|
+
case '7': case '8': case '9':
|
5665
|
+
tokadd('$');
|
5666
|
+
do {
|
5667
|
+
tokadd(c);
|
5668
|
+
c = nextc();
|
5669
|
+
} while(c != -1 && ISDIGIT(c));
|
5670
|
+
pushback(c);
|
5671
|
+
if(lex_state_of_p(last_state, EXPR_FNAME)) goto gvar;
|
5672
|
+
tokfix();
|
5673
|
+
set_yylval_node(NEW_NTH_REF(parse_numvar(parser_state)));
|
5674
|
+
return tNTH_REF;
|
5675
|
+
|
5676
|
+
default:
|
5677
|
+
if(!parser_is_identchar()) {
|
5678
|
+
if(c == -1 || ISSPACE(c)) {
|
5679
|
+
rb_compile_error(parser_state,
|
5680
|
+
"`$' without identifiers is not allowed as a global variable name");
|
5681
|
+
} else {
|
5682
|
+
pushback(c);
|
5683
|
+
rb_compile_error(parser_state, "`$%c' is not allowed as a global variable name", c);
|
5684
|
+
}
|
5685
|
+
return 0;
|
5686
|
+
}
|
5687
|
+
case '0':
|
5688
|
+
tokadd('$');
|
5689
|
+
}
|
5690
|
+
|
5691
|
+
if(tokadd_ident(parser_state, c)) return 0;
|
5692
|
+
SET_LEX_STATE(EXPR_END);
|
5693
|
+
tokenize_ident(parser_state, last_state);
|
5694
|
+
return tGVAR;
|
5695
|
+
}
|
5696
|
+
|
5697
|
+
static int
|
5698
|
+
parse_atmark(rb_parser_state* parser_state, const enum lex_state_e last_state)
|
5699
|
+
{
|
5700
|
+
int result = tIVAR;
|
5701
|
+
int c = nextc();
|
5702
|
+
|
5703
|
+
newtok();
|
5704
|
+
tokadd('@');
|
5705
|
+
if(c == '@') {
|
5706
|
+
result = tCVAR;
|
5707
|
+
tokadd('@');
|
5708
|
+
c = nextc();
|
5709
|
+
}
|
5710
|
+
if(c == -1 || ISSPACE(c)) {
|
5711
|
+
if(result == tIVAR) {
|
5712
|
+
rb_compile_error(parser_state,
|
5713
|
+
"`@' without identifiers is not allowed as an instance variable name");
|
5714
|
+
} else {
|
5715
|
+
rb_compile_error(parser_state,
|
5716
|
+
"`@@' without identifiers is not allowed as a class variable name");
|
5717
|
+
}
|
5718
|
+
return 0;
|
5719
|
+
} else if(ISDIGIT(c) || !parser_is_identchar()) {
|
5720
|
+
pushback(c);
|
5721
|
+
if(result == tIVAR) {
|
5722
|
+
rb_compile_error(parser_state,
|
5723
|
+
"`@%c' is not allowed as an instance variable name", c);
|
5724
|
+
} else {
|
5725
|
+
rb_compile_error(parser_state,
|
5726
|
+
"`@@%c' is not allowed as a class variable name", c);
|
5727
|
+
}
|
5728
|
+
return 0;
|
5729
|
+
}
|
5730
|
+
|
5731
|
+
if(tokadd_ident(parser_state, c)) return 0;
|
5732
|
+
SET_LEX_STATE(EXPR_END);
|
5733
|
+
tokenize_ident(parser_state, last_state);
|
5734
|
+
return result;
|
5735
|
+
}
|
5736
|
+
|
5737
|
+
static int parse_ident(rb_parser_state* parser_state, int c, int cmd_state) {
|
5738
|
+
int result = 0;
|
5739
|
+
int mb = ENC_CODERANGE_7BIT;
|
5740
|
+
const enum lex_state_e last_state = lex_state;
|
5741
|
+
ID ident;
|
5742
|
+
|
5743
|
+
do {
|
5744
|
+
if(!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
|
5745
|
+
if(tokadd_mbchar(c) == -1) return 0;
|
5746
|
+
c = nextc();
|
5747
|
+
} while(parser_is_identchar());
|
5748
|
+
if((c == '!' || c == '?') && !peek('=')) {
|
5749
|
+
tokadd(c);
|
5750
|
+
} else {
|
5751
|
+
pushback(c);
|
5752
|
+
}
|
5753
|
+
tokfix();
|
5754
|
+
|
5755
|
+
if(toklast() == '!' || toklast() == '?') {
|
5756
|
+
result = tFID;
|
5757
|
+
} else {
|
5758
|
+
if(lex_state_p(EXPR_FNAME)) {
|
5759
|
+
int c = nextc();
|
5760
|
+
if(c == '=' && !peek('~') && !peek('>') &&
|
5761
|
+
(!peek('=') || (peek_n('>', 1)))) {
|
5762
|
+
result = tIDENTIFIER;
|
5763
|
+
tokadd(c);
|
5764
|
+
tokfix();
|
5765
|
+
} else {
|
5766
|
+
pushback(c);
|
5767
|
+
}
|
5768
|
+
}
|
5769
|
+
if(result == 0 && ISUPPER(tok()[0])) {
|
5770
|
+
result = tCONSTANT;
|
5771
|
+
} else {
|
5772
|
+
result = tIDENTIFIER;
|
5773
|
+
}
|
5774
|
+
}
|
5775
|
+
|
5776
|
+
if(IS_LABEL_POSSIBLE()) {
|
5777
|
+
if(IS_LABEL_SUFFIX(0)) {
|
5778
|
+
SET_LEX_STATE(EXPR_ARG | EXPR_LABELED);
|
5779
|
+
nextc();
|
5780
|
+
set_yylval_name(TOK_INTERN());
|
5781
|
+
return tLABEL;
|
5782
|
+
}
|
5783
|
+
}
|
5784
|
+
if(mb == ENC_CODERANGE_7BIT && !lex_state_p(EXPR_DOT)) {
|
5785
|
+
const struct kwtable *kw;
|
5786
|
+
|
5787
|
+
/* See if it is a reserved word. */
|
5788
|
+
kw = rb_reserved_word(tok(), toklen());
|
5789
|
+
if(kw) {
|
5790
|
+
enum lex_state_e state = lex_state;
|
5791
|
+
SET_LEX_STATE(kw->state);
|
5792
|
+
if(lex_state_of_p(state, EXPR_FNAME)) {
|
5793
|
+
set_yylval_name(parser_intern2(tok(), toklen()));
|
5794
|
+
return kw->id[0];
|
5795
|
+
}
|
5796
|
+
if(lex_state_p(EXPR_BEG)) {
|
5797
|
+
command_start = TRUE;
|
5798
|
+
}
|
5799
|
+
if(kw->id[0] == keyword_do) {
|
5800
|
+
if(lpar_beg && lpar_beg == paren_nest) {
|
5801
|
+
lpar_beg = 0;
|
5802
|
+
--paren_nest;
|
5803
|
+
return keyword_do_LAMBDA;
|
5804
|
+
}
|
5805
|
+
if(COND_P()) return keyword_do_cond;
|
5806
|
+
if(CMDARG_P() && !lex_state_of_p(state, EXPR_CMDARG)) {
|
5807
|
+
return keyword_do_block;
|
5808
|
+
}
|
5809
|
+
if(lex_state_of_p(state, (EXPR_BEG | EXPR_ENDARG))) {
|
5810
|
+
return keyword_do_block;
|
5811
|
+
}
|
5812
|
+
return keyword_do;
|
5813
|
+
}
|
5814
|
+
if(lex_state_of_p(state, (EXPR_BEG | EXPR_LABELED))) {
|
5815
|
+
return kw->id[0];
|
5816
|
+
} else {
|
5817
|
+
if(kw->id[0] != kw->id[1]) {
|
5818
|
+
SET_LEX_STATE(EXPR_BEG | EXPR_LABEL);
|
5819
|
+
}
|
5820
|
+
return kw->id[1];
|
5821
|
+
}
|
5822
|
+
}
|
5823
|
+
}
|
5824
|
+
|
5825
|
+
if(lex_state_p(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) {
|
5826
|
+
if(cmd_state) {
|
5827
|
+
SET_LEX_STATE(EXPR_CMDARG);
|
5828
|
+
} else {
|
5829
|
+
SET_LEX_STATE(EXPR_ARG);
|
5830
|
+
}
|
5831
|
+
} else if(lex_state == EXPR_FNAME) {
|
5832
|
+
SET_LEX_STATE(EXPR_ENDFN);
|
5833
|
+
} else {
|
5834
|
+
SET_LEX_STATE(EXPR_END);
|
5835
|
+
}
|
5836
|
+
|
5837
|
+
ident = tokenize_ident(parser_state, last_state);
|
5838
|
+
if(!lex_state_of_p(last_state, EXPR_DOT | EXPR_FNAME) &&
|
5839
|
+
(result == tIDENTIFIER) && /* not EXPR_FNAME, not attrasgn */
|
5840
|
+
lvar_defined(ident)) {
|
5841
|
+
SET_LEX_STATE(EXPR_END | EXPR_LABEL);
|
5842
|
+
}
|
5843
|
+
|
5844
|
+
return result;
|
5845
|
+
}
|
5846
|
+
|
5847
|
+
static int parser_yylex(rb_parser_state* parser_state) {
|
5848
|
+
int c;
|
5849
|
+
int space_seen = 0;
|
5850
|
+
int cmd_state;
|
5851
|
+
int label;
|
5852
|
+
enum lex_state_e last_state;
|
5853
|
+
int fallthru = FALSE;
|
5854
|
+
int tok_seen = token_seen;
|
5855
|
+
|
5856
|
+
if(lex_strterm) {
|
5857
|
+
int token;
|
5858
|
+
if(nd_type(lex_strterm) == NODE_HEREDOC) {
|
5859
|
+
token = here_document(lex_strterm);
|
5860
|
+
if(token == tSTRING_END) {
|
5861
|
+
lex_strterm = 0;
|
5862
|
+
SET_LEX_STATE(EXPR_END);
|
5863
|
+
}
|
5864
|
+
} else {
|
5865
|
+
token = parse_string(lex_strterm);
|
5866
|
+
if(token == tSTRING_END && (lex_strterm->nd_func & STR_FUNC_LABEL)) {
|
4882
5867
|
if(((lex_state_p(EXPR_BEG | EXPR_ENDFN) && !COND_P()) || IS_ARG()) && IS_LABEL_SUFFIX(0)) {
|
4883
5868
|
nextc();
|
4884
5869
|
token = tLABEL_END;
|
@@ -4886,7 +5871,7 @@ parser_yylex(rb_parser_state *parser_state)
|
|
4886
5871
|
}
|
4887
5872
|
if(token == tSTRING_END || token == tREGEXP_END || token == tLABEL_END) {
|
4888
5873
|
lex_strterm = 0;
|
4889
|
-
|
5874
|
+
SET_LEX_STATE(token == tLABEL_END ? EXPR_BEG|EXPR_LABEL : EXPR_END);
|
4890
5875
|
}
|
4891
5876
|
}
|
4892
5877
|
return token;
|
@@ -4894,6 +5879,7 @@ parser_yylex(rb_parser_state *parser_state)
|
|
4894
5879
|
|
4895
5880
|
cmd_state = command_start;
|
4896
5881
|
command_start = FALSE;
|
5882
|
+
token_seen = TRUE;
|
4897
5883
|
retry:
|
4898
5884
|
last_state = lex_state;
|
4899
5885
|
switch(c = nextc()) {
|
@@ -4910,6 +5896,7 @@ retry:
|
|
4910
5896
|
goto retry;
|
4911
5897
|
|
4912
5898
|
case '#': /* it's a comment */
|
5899
|
+
token_seen = tok_seen;
|
4913
5900
|
/* no magic_comment in shebang line */
|
4914
5901
|
if(!parser_magic_comment(parser_state, lex_p, lex_pend - lex_p)) {
|
4915
5902
|
if(comment_at_top(parser_state)) {
|
@@ -4918,9 +5905,17 @@ retry:
|
|
4918
5905
|
}
|
4919
5906
|
|
4920
5907
|
lex_p = lex_pend;
|
5908
|
+
fallthru = TRUE;
|
4921
5909
|
/* fall through */
|
4922
5910
|
case '\n':
|
4923
|
-
|
5911
|
+
token_seen = tok_seen;
|
5912
|
+
c = (lex_state_p(EXPR_BEG | EXPR_CLASS | EXPR_FNAME | EXPR_DOT)
|
5913
|
+
&& !lex_state_p(EXPR_LABELED));
|
5914
|
+
if(c || lex_state_all_p(EXPR_ARG | EXPR_LABELED)) {
|
5915
|
+
fallthru = FALSE;
|
5916
|
+
if(!c && in_kwarg) {
|
5917
|
+
goto normal_newline;
|
5918
|
+
}
|
4924
5919
|
goto retry;
|
4925
5920
|
}
|
4926
5921
|
|
@@ -4930,10 +5925,10 @@ retry:
|
|
4930
5925
|
case '\13': /* '\v' */
|
4931
5926
|
space_seen = 1;
|
4932
5927
|
break;
|
5928
|
+
case '&':
|
4933
5929
|
case '.': {
|
4934
|
-
if((
|
5930
|
+
if(peek('.') == (c == '&')) {
|
4935
5931
|
pushback(c);
|
4936
|
-
pushback('.');
|
4937
5932
|
goto retry;
|
4938
5933
|
}
|
4939
5934
|
}
|
@@ -4948,21 +5943,21 @@ retry:
|
|
4948
5943
|
|
4949
5944
|
normal_newline:
|
4950
5945
|
command_start = TRUE;
|
4951
|
-
|
5946
|
+
SET_LEX_STATE(EXPR_BEG);
|
4952
5947
|
return '\n';
|
4953
5948
|
|
4954
5949
|
case '*':
|
4955
5950
|
if((c = nextc()) == '*') {
|
4956
5951
|
if((c = nextc()) == '=') {
|
4957
5952
|
set_yylval_id(tPOW);
|
4958
|
-
|
5953
|
+
SET_LEX_STATE(EXPR_BEG);
|
4959
5954
|
return tOP_ASGN;
|
4960
5955
|
}
|
4961
5956
|
pushback(c);
|
4962
|
-
if
|
5957
|
+
if(IS_SPCARG(c)) {
|
4963
5958
|
rb_warning0("`**' interpreted as argument prefix");
|
4964
5959
|
c = tDSTAR;
|
4965
|
-
} else if
|
5960
|
+
} else if(IS_BEG()) {
|
4966
5961
|
c = tDSTAR;
|
4967
5962
|
} else {
|
4968
5963
|
warn_balanced("**", "argument prefix");
|
@@ -4971,7 +5966,7 @@ retry:
|
|
4971
5966
|
} else {
|
4972
5967
|
if(c == '=') {
|
4973
5968
|
set_yylval_id('*');
|
4974
|
-
|
5969
|
+
SET_LEX_STATE(EXPR_BEG);
|
4975
5970
|
return tOP_ASGN;
|
4976
5971
|
}
|
4977
5972
|
pushback(c);
|
@@ -4985,18 +5980,18 @@ retry:
|
|
4985
5980
|
c = '*';
|
4986
5981
|
}
|
4987
5982
|
}
|
4988
|
-
|
5983
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
4989
5984
|
return c;
|
4990
5985
|
|
4991
5986
|
case '!':
|
4992
5987
|
c = nextc();
|
4993
5988
|
if(IS_AFTER_OPERATOR()) {
|
4994
|
-
|
5989
|
+
SET_LEX_STATE(EXPR_ARG);
|
4995
5990
|
if(c == '@') {
|
4996
5991
|
return '!';
|
4997
5992
|
}
|
4998
5993
|
} else {
|
4999
|
-
|
5994
|
+
SET_LEX_STATE(EXPR_BEG);
|
5000
5995
|
}
|
5001
5996
|
if(c == '=') {
|
5002
5997
|
return tNEQ;
|
@@ -5011,6 +6006,7 @@ retry:
|
|
5011
6006
|
if(was_bol()) {
|
5012
6007
|
/* skip embedded rd document */
|
5013
6008
|
if(strncmp(lex_p, "begin", 5) == 0 && ISSPACE(lex_p[5])) {
|
6009
|
+
lex_goto_eol(parser_state);
|
5014
6010
|
for(;;) {
|
5015
6011
|
lex_goto_eol(parser_state);
|
5016
6012
|
c = nextc();
|
@@ -5019,7 +6015,7 @@ retry:
|
|
5019
6015
|
return 0;
|
5020
6016
|
}
|
5021
6017
|
if(c != '=') continue;
|
5022
|
-
if(strncmp(lex_p, "end", 3) == 0 &&
|
6018
|
+
if(c == '=' && strncmp(lex_p, "end", 3) == 0 &&
|
5023
6019
|
(lex_p + 3 == lex_pend || ISSPACE(lex_p[3]))) {
|
5024
6020
|
break;
|
5025
6021
|
}
|
@@ -5029,7 +6025,7 @@ retry:
|
|
5029
6025
|
}
|
5030
6026
|
}
|
5031
6027
|
|
5032
|
-
|
6028
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
5033
6029
|
if((c = nextc()) == '=') {
|
5034
6030
|
if((c = nextc()) == '=') {
|
5035
6031
|
return tEQQ;
|
@@ -5051,17 +6047,17 @@ retry:
|
|
5051
6047
|
if(c == '<' &&
|
5052
6048
|
!lex_state_p(EXPR_DOT | EXPR_CLASS) &&
|
5053
6049
|
!IS_END() &&
|
5054
|
-
(!IS_ARG() || space_seen)) {
|
6050
|
+
(!IS_ARG() || lex_state_p(EXPR_LABELED) || space_seen)) {
|
5055
6051
|
int token = heredoc_identifier();
|
5056
6052
|
if(token) return token;
|
5057
6053
|
}
|
5058
6054
|
if(IS_AFTER_OPERATOR()) {
|
5059
|
-
|
6055
|
+
SET_LEX_STATE(EXPR_ARG);
|
5060
6056
|
} else {
|
5061
6057
|
if(lex_state_p(EXPR_CLASS)) {
|
5062
6058
|
command_start = TRUE;
|
5063
6059
|
}
|
5064
|
-
|
6060
|
+
SET_LEX_STATE(EXPR_BEG);
|
5065
6061
|
}
|
5066
6062
|
if(c == '=') {
|
5067
6063
|
if((c = nextc()) == '>') {
|
@@ -5073,7 +6069,7 @@ retry:
|
|
5073
6069
|
if(c == '<') {
|
5074
6070
|
if((c = nextc()) == '=') {
|
5075
6071
|
set_yylval_id(tLSHFT);
|
5076
|
-
|
6072
|
+
SET_LEX_STATE(EXPR_BEG);
|
5077
6073
|
return tOP_ASGN;
|
5078
6074
|
}
|
5079
6075
|
pushback(c);
|
@@ -5084,14 +6080,14 @@ retry:
|
|
5084
6080
|
return '<';
|
5085
6081
|
|
5086
6082
|
case '>':
|
5087
|
-
|
6083
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
5088
6084
|
if((c = nextc()) == '=') {
|
5089
6085
|
return tGEQ;
|
5090
6086
|
}
|
5091
6087
|
if(c == '>') {
|
5092
6088
|
if((c = nextc()) == '=') {
|
5093
6089
|
set_yylval_id(tRSHFT);
|
5094
|
-
|
6090
|
+
SET_LEX_STATE(EXPR_BEG);
|
5095
6091
|
return tOP_ASGN;
|
5096
6092
|
}
|
5097
6093
|
pushback(c);
|
@@ -5107,14 +6103,14 @@ retry:
|
|
5107
6103
|
|
5108
6104
|
case '`':
|
5109
6105
|
if(lex_state_p(EXPR_FNAME)) {
|
5110
|
-
|
6106
|
+
SET_LEX_STATE(EXPR_ENDFN);
|
5111
6107
|
return c;
|
5112
6108
|
}
|
5113
6109
|
if(lex_state_p(EXPR_DOT)) {
|
5114
6110
|
if(cmd_state) {
|
5115
|
-
|
6111
|
+
SET_LEX_STATE(EXPR_CMDARG);
|
5116
6112
|
} else {
|
5117
|
-
|
6113
|
+
SET_LEX_STATE(EXPR_ARG);
|
5118
6114
|
}
|
5119
6115
|
return c;
|
5120
6116
|
}
|
@@ -5127,93 +6123,25 @@ retry:
|
|
5127
6123
|
return tSTRING_BEG;
|
5128
6124
|
|
5129
6125
|
case '?':
|
5130
|
-
|
5131
|
-
lex_state = EXPR_VALUE;
|
5132
|
-
return '?';
|
5133
|
-
}
|
5134
|
-
c = nextc();
|
5135
|
-
if(c == -1) {
|
5136
|
-
rb_compile_error(parser_state, "incomplete character syntax");
|
5137
|
-
return 0;
|
5138
|
-
}
|
5139
|
-
if(parser_enc_isspace(c, parser_state->enc)) {
|
5140
|
-
if(!IS_ARG()){
|
5141
|
-
int c2 = 0;
|
5142
|
-
switch(c) {
|
5143
|
-
case ' ':
|
5144
|
-
c2 = 's';
|
5145
|
-
break;
|
5146
|
-
case '\n':
|
5147
|
-
c2 = 'n';
|
5148
|
-
break;
|
5149
|
-
case '\t':
|
5150
|
-
c2 = 't';
|
5151
|
-
break;
|
5152
|
-
case '\v':
|
5153
|
-
c2 = 'v';
|
5154
|
-
break;
|
5155
|
-
case '\r':
|
5156
|
-
c2 = 'r';
|
5157
|
-
break;
|
5158
|
-
case '\f':
|
5159
|
-
c2 = 'f';
|
5160
|
-
break;
|
5161
|
-
}
|
5162
|
-
if(c2) {
|
5163
|
-
rb_warn("invalid character syntax; use ?\\%c", c2);
|
5164
|
-
}
|
5165
|
-
}
|
5166
|
-
ternary:
|
5167
|
-
pushback(c);
|
5168
|
-
lex_state = EXPR_VALUE;
|
5169
|
-
return '?';
|
5170
|
-
}
|
5171
|
-
|
5172
|
-
newtok();
|
5173
|
-
enc = parser_state->enc;
|
5174
|
-
if(!parser_isascii()) {
|
5175
|
-
if(tokadd_mbchar(c) == -1) return 0;
|
5176
|
-
} else if((parser_enc_isalnum(c, parser_state->enc) || c == '_') &&
|
5177
|
-
lex_p < lex_pend && is_identchar(lex_p, lex_pend, parser_state->enc)) {
|
5178
|
-
goto ternary;
|
5179
|
-
} else if(c == '\\') {
|
5180
|
-
if(peek('u')) {
|
5181
|
-
nextc();
|
5182
|
-
c = parser_tokadd_utf8(parser_state, &enc, 0, 0, 0);
|
5183
|
-
if(0x80 <= c) {
|
5184
|
-
tokaddmbc(c, enc);
|
5185
|
-
} else {
|
5186
|
-
tokadd(c);
|
5187
|
-
}
|
5188
|
-
} else if(!lex_eol_p() && !(c = *lex_p, ISASCII(c))) {
|
5189
|
-
nextc();
|
5190
|
-
if(tokadd_mbchar(c) == -1) return 0;
|
5191
|
-
} else {
|
5192
|
-
c = read_escape(0, &enc);
|
5193
|
-
tokadd(c);
|
5194
|
-
}
|
5195
|
-
} else {
|
5196
|
-
tokadd(c);
|
5197
|
-
}
|
5198
|
-
tokfix();
|
5199
|
-
set_yylval_str(STR_NEW3(tok(), toklen(), enc, 0));
|
5200
|
-
lex_state = EXPR_END;
|
5201
|
-
return tCHAR;
|
6126
|
+
return parse_qmark(parser_state);
|
5202
6127
|
|
5203
6128
|
case '&':
|
5204
6129
|
if((c = nextc()) == '&') {
|
5205
|
-
|
6130
|
+
SET_LEX_STATE(EXPR_BEG);
|
5206
6131
|
if((c = nextc()) == '=') {
|
5207
6132
|
set_yylval_id(tANDOP);
|
5208
|
-
|
6133
|
+
SET_LEX_STATE(EXPR_BEG);
|
5209
6134
|
return tOP_ASGN;
|
5210
6135
|
}
|
5211
6136
|
pushback(c);
|
5212
6137
|
return tANDOP;
|
5213
6138
|
} else if(c == '=') {
|
5214
6139
|
set_yylval_id('&');
|
5215
|
-
|
6140
|
+
SET_LEX_STATE(EXPR_BEG);
|
5216
6141
|
return tOP_ASGN;
|
6142
|
+
} else if(c == '.') {
|
6143
|
+
SET_LEX_STATE(EXPR_DOT);
|
6144
|
+
return tANDDOT;
|
5217
6145
|
}
|
5218
6146
|
pushback(c);
|
5219
6147
|
if(IS_SPCARG(c)){
|
@@ -5225,15 +6153,15 @@ retry:
|
|
5225
6153
|
warn_balanced("&", "argument prefix");
|
5226
6154
|
c = '&';
|
5227
6155
|
}
|
5228
|
-
|
6156
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
5229
6157
|
return c;
|
5230
6158
|
|
5231
6159
|
case '|':
|
5232
6160
|
if((c = nextc()) == '|') {
|
5233
|
-
|
6161
|
+
SET_LEX_STATE(EXPR_BEG);
|
5234
6162
|
if((c = nextc()) == '=') {
|
5235
6163
|
set_yylval_id(tOROP);
|
5236
|
-
|
6164
|
+
SET_LEX_STATE(EXPR_BEG);
|
5237
6165
|
return tOP_ASGN;
|
5238
6166
|
}
|
5239
6167
|
pushback(c);
|
@@ -5241,335 +6169,92 @@ retry:
|
|
5241
6169
|
}
|
5242
6170
|
if(c == '=') {
|
5243
6171
|
set_yylval_id('|');
|
5244
|
-
|
6172
|
+
SET_LEX_STATE(EXPR_BEG);
|
5245
6173
|
return tOP_ASGN;
|
5246
6174
|
}
|
5247
|
-
|
6175
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG|EXPR_LABEL);
|
5248
6176
|
pushback(c);
|
5249
6177
|
return '|';
|
5250
6178
|
|
5251
|
-
case '+':
|
5252
|
-
c = nextc();
|
5253
|
-
if(IS_AFTER_OPERATOR()) {
|
5254
|
-
|
5255
|
-
if(c == '@') {
|
5256
|
-
return tUPLUS;
|
5257
|
-
}
|
5258
|
-
pushback(c);
|
5259
|
-
return '+';
|
5260
|
-
}
|
5261
|
-
if(c == '=') {
|
5262
|
-
set_yylval_id('+');
|
5263
|
-
lex_state = EXPR_BEG;
|
5264
|
-
return tOP_ASGN;
|
5265
|
-
}
|
5266
|
-
if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) {
|
5267
|
-
lex_state = EXPR_BEG;
|
5268
|
-
pushback(c);
|
5269
|
-
if(c != -1 && ISDIGIT(c)) {
|
5270
|
-
c = '+';
|
5271
|
-
goto start_num;
|
5272
|
-
}
|
5273
|
-
return tUPLUS;
|
5274
|
-
}
|
5275
|
-
lex_state = EXPR_BEG;
|
5276
|
-
pushback(c);
|
5277
|
-
warn_balanced("+", "unary operator");
|
5278
|
-
return '+';
|
5279
|
-
|
5280
|
-
case '-':
|
5281
|
-
c = nextc();
|
5282
|
-
if(IS_AFTER_OPERATOR()) {
|
5283
|
-
lex_state = EXPR_ARG;
|
5284
|
-
if(c == '@') {
|
5285
|
-
return tUMINUS;
|
5286
|
-
}
|
5287
|
-
pushback(c);
|
5288
|
-
return '-';
|
5289
|
-
}
|
5290
|
-
if(c == '=') {
|
5291
|
-
set_yylval_id('-');
|
5292
|
-
lex_state = EXPR_BEG;
|
5293
|
-
return tOP_ASGN;
|
5294
|
-
}
|
5295
|
-
if(c == '>') {
|
5296
|
-
lex_state = EXPR_ENDFN;
|
5297
|
-
return tLAMBDA;
|
5298
|
-
}
|
5299
|
-
if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous())) {
|
5300
|
-
lex_state = EXPR_BEG;
|
5301
|
-
pushback(c);
|
5302
|
-
if(c != -1 && ISDIGIT(c)) {
|
5303
|
-
return tUMINUS_NUM;
|
5304
|
-
}
|
5305
|
-
return tUMINUS;
|
5306
|
-
}
|
5307
|
-
lex_state = EXPR_BEG;
|
5308
|
-
pushback(c);
|
5309
|
-
warn_balanced("-", "unary operator");
|
5310
|
-
return '-';
|
5311
|
-
|
5312
|
-
case '.':
|
5313
|
-
lex_state = EXPR_BEG;
|
5314
|
-
if((c = nextc()) == '.') {
|
5315
|
-
if((c = nextc()) == '.') {
|
5316
|
-
return tDOT3;
|
5317
|
-
}
|
5318
|
-
pushback(c);
|
5319
|
-
return tDOT2;
|
5320
|
-
}
|
5321
|
-
pushback(c);
|
5322
|
-
if(c != -1 && ISDIGIT(c)) {
|
5323
|
-
yy_error("no .<digit> floating literal anymore; put 0 before dot");
|
5324
|
-
}
|
5325
|
-
lex_state = EXPR_DOT;
|
5326
|
-
return '.';
|
5327
|
-
|
5328
|
-
start_num:
|
5329
|
-
case '0': case '1': case '2': case '3': case '4':
|
5330
|
-
case '5': case '6': case '7': case '8': case '9':
|
5331
|
-
{
|
5332
|
-
int is_float, seen_point, seen_e, nondigit, suffix;
|
5333
|
-
|
5334
|
-
is_float = seen_point = seen_e = nondigit = 0;
|
5335
|
-
lex_state = EXPR_END;
|
5336
|
-
newtok();
|
5337
|
-
if(c == '-' || c == '+') {
|
5338
|
-
tokadd(c);
|
5339
|
-
c = nextc();
|
5340
|
-
}
|
5341
|
-
if(c == '0') {
|
5342
|
-
#define no_digits() do {yy_error("numeric literal without digits"); return 0;} while(0)
|
5343
|
-
int start = toklen();
|
5344
|
-
c = nextc();
|
5345
|
-
if(c == 'x' || c == 'X') {
|
5346
|
-
/* hexadecimal */
|
5347
|
-
c = nextc();
|
5348
|
-
if(c != -1 && ISXDIGIT(c)) {
|
5349
|
-
do {
|
5350
|
-
if(c == '_') {
|
5351
|
-
if(nondigit) break;
|
5352
|
-
nondigit = c;
|
5353
|
-
continue;
|
5354
|
-
}
|
5355
|
-
if(!ISXDIGIT(c)) break;
|
5356
|
-
nondigit = 0;
|
5357
|
-
tokadd(c);
|
5358
|
-
} while((c = nextc()) != -1);
|
5359
|
-
}
|
5360
|
-
pushback(c);
|
5361
|
-
tokfix();
|
5362
|
-
if(toklen() == start) {
|
5363
|
-
no_digits();
|
5364
|
-
} else if(nondigit) {
|
5365
|
-
goto trailing_uc;
|
5366
|
-
}
|
5367
|
-
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5368
|
-
return set_integer_literal(rb_cstr_to_inum(tok(), 16, FALSE), suffix);
|
5369
|
-
}
|
5370
|
-
|
5371
|
-
if(c == 'b' || c == 'B') {
|
5372
|
-
/* binary */
|
5373
|
-
c = nextc();
|
5374
|
-
if(c == '0' || c == '1') {
|
5375
|
-
do {
|
5376
|
-
if(c == '_') {
|
5377
|
-
if(nondigit) break;
|
5378
|
-
nondigit = c;
|
5379
|
-
continue;
|
5380
|
-
}
|
5381
|
-
if(c != '0' && c != '1') break;
|
5382
|
-
nondigit = 0;
|
5383
|
-
tokadd(c);
|
5384
|
-
} while((c = nextc()) != -1);
|
5385
|
-
}
|
5386
|
-
pushback(c);
|
5387
|
-
tokfix();
|
5388
|
-
if(toklen() == start) {
|
5389
|
-
no_digits();
|
5390
|
-
} else if(nondigit) {
|
5391
|
-
goto trailing_uc;
|
5392
|
-
}
|
5393
|
-
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5394
|
-
return set_integer_literal(rb_cstr_to_inum(tok(), 2, FALSE), suffix);
|
5395
|
-
}
|
5396
|
-
|
5397
|
-
if(c == 'd' || c == 'D') {
|
5398
|
-
/* decimal */
|
5399
|
-
c = nextc();
|
5400
|
-
if(c != -1 && ISDIGIT(c)) {
|
5401
|
-
do {
|
5402
|
-
if(c == '_') {
|
5403
|
-
if(nondigit) break;
|
5404
|
-
nondigit = c;
|
5405
|
-
continue;
|
5406
|
-
}
|
5407
|
-
if(!ISDIGIT(c)) break;
|
5408
|
-
nondigit = 0;
|
5409
|
-
tokadd(c);
|
5410
|
-
} while((c = nextc()) != -1);
|
5411
|
-
}
|
5412
|
-
pushback(c);
|
5413
|
-
tokfix();
|
5414
|
-
if(toklen() == start) {
|
5415
|
-
no_digits();
|
5416
|
-
} else if(nondigit) {
|
5417
|
-
goto trailing_uc;
|
5418
|
-
}
|
5419
|
-
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5420
|
-
return set_integer_literal(rb_cstr_to_inum(tok(), 10, FALSE), suffix);
|
5421
|
-
}
|
5422
|
-
|
5423
|
-
if(c == '_') {
|
5424
|
-
/* 0_0 */
|
5425
|
-
goto octal_number;
|
5426
|
-
}
|
5427
|
-
|
5428
|
-
if(c == 'o' || c == 'O') {
|
5429
|
-
/* prefixed octal */
|
5430
|
-
c = nextc();
|
5431
|
-
if(c == -1 || c == '_' || !ISDIGIT(c)) {
|
5432
|
-
no_digits();
|
5433
|
-
}
|
5434
|
-
}
|
5435
|
-
|
5436
|
-
if(c >= '0' && c <= '7') {
|
5437
|
-
/* octal */
|
5438
|
-
octal_number:
|
5439
|
-
do {
|
5440
|
-
if(c == '_') {
|
5441
|
-
if(nondigit) break;
|
5442
|
-
nondigit = c;
|
5443
|
-
continue;
|
5444
|
-
}
|
5445
|
-
if(c < '0' || c > '9') break;
|
5446
|
-
if(c > '7') goto invalid_octal;
|
5447
|
-
nondigit = 0;
|
5448
|
-
tokadd(c);
|
5449
|
-
} while((c = nextc()) != -1);
|
5450
|
-
|
5451
|
-
if(toklen() > start) {
|
5452
|
-
pushback(c);
|
5453
|
-
tokfix();
|
5454
|
-
if(nondigit) goto trailing_uc;
|
5455
|
-
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5456
|
-
return set_integer_literal(rb_cstr_to_inum(tok(), 8, FALSE), suffix);
|
5457
|
-
}
|
5458
|
-
if(nondigit) {
|
5459
|
-
pushback(c);
|
5460
|
-
goto trailing_uc;
|
5461
|
-
}
|
5462
|
-
}
|
5463
|
-
|
5464
|
-
if(c > '7' && c <= '9') {
|
5465
|
-
invalid_octal:
|
5466
|
-
yy_error("Invalid octal digit");
|
5467
|
-
} else if(c == '.' || c == 'e' || c == 'E') {
|
5468
|
-
tokadd('0');
|
5469
|
-
} else {
|
5470
|
-
pushback(c);
|
5471
|
-
suffix = number_literal_suffix(NUM_SUFFIX_ALL);
|
5472
|
-
return set_integer_literal(INT2FIX(0), suffix);
|
5473
|
-
}
|
5474
|
-
}
|
5475
|
-
|
5476
|
-
for(;;) {
|
5477
|
-
switch(c) {
|
5478
|
-
case '0': case '1': case '2': case '3': case '4':
|
5479
|
-
case '5': case '6': case '7': case '8': case '9':
|
5480
|
-
nondigit = 0;
|
5481
|
-
tokadd(c);
|
5482
|
-
break;
|
5483
|
-
|
5484
|
-
case '.':
|
5485
|
-
if(nondigit) goto trailing_uc;
|
5486
|
-
if(seen_point || seen_e) {
|
5487
|
-
goto decode_num;
|
5488
|
-
} else {
|
5489
|
-
int c0 = nextc();
|
5490
|
-
if(c0 == -1 || !ISDIGIT(c0)) {
|
5491
|
-
pushback(c0);
|
5492
|
-
goto decode_num;
|
5493
|
-
}
|
5494
|
-
c = c0;
|
5495
|
-
}
|
5496
|
-
seen_point = toklen();
|
5497
|
-
tokadd('.');
|
5498
|
-
tokadd(c);
|
5499
|
-
is_float++;
|
5500
|
-
nondigit = 0;
|
5501
|
-
break;
|
5502
|
-
|
5503
|
-
case 'e':
|
5504
|
-
case 'E':
|
5505
|
-
if(nondigit) {
|
5506
|
-
pushback(c);
|
5507
|
-
c = nondigit;
|
5508
|
-
goto decode_num;
|
5509
|
-
}
|
5510
|
-
if(seen_e) {
|
5511
|
-
goto decode_num;
|
5512
|
-
}
|
5513
|
-
nondigit = c;
|
5514
|
-
c = nextc();
|
5515
|
-
if(c != '-' && c != '+' && !ISDIGIT(c)) {
|
5516
|
-
pushback(c);
|
5517
|
-
nondigit = 0;
|
5518
|
-
goto decode_num;
|
5519
|
-
}
|
5520
|
-
tokadd(nondigit);
|
5521
|
-
seen_e++;
|
5522
|
-
is_float++;
|
5523
|
-
tokadd(c);
|
5524
|
-
nondigit = (c == '-' || c == '+') ? c : 0;
|
5525
|
-
break;
|
5526
|
-
|
5527
|
-
case '_': /* `_' in number just ignored */
|
5528
|
-
if(nondigit) goto decode_num;
|
5529
|
-
nondigit = c;
|
5530
|
-
break;
|
5531
|
-
|
5532
|
-
default:
|
5533
|
-
goto decode_num;
|
6179
|
+
case '+':
|
6180
|
+
c = nextc();
|
6181
|
+
if(IS_AFTER_OPERATOR()) {
|
6182
|
+
SET_LEX_STATE(EXPR_ARG);
|
6183
|
+
if(c == '@') {
|
6184
|
+
return tUPLUS;
|
5534
6185
|
}
|
5535
|
-
c
|
6186
|
+
pushback(c);
|
6187
|
+
return '+';
|
6188
|
+
}
|
6189
|
+
if(c == '=') {
|
6190
|
+
set_yylval_id('+');
|
6191
|
+
SET_LEX_STATE(EXPR_BEG);
|
6192
|
+
return tOP_ASGN;
|
6193
|
+
}
|
6194
|
+
if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous('+'))) {
|
6195
|
+
SET_LEX_STATE(EXPR_BEG);
|
6196
|
+
pushback(c);
|
6197
|
+
if(c != -1 && ISDIGIT(c)) {
|
6198
|
+
return parse_numeric(parser_state, '+');
|
6199
|
+
}
|
6200
|
+
return tUPLUS;
|
5536
6201
|
}
|
6202
|
+
SET_LEX_STATE(EXPR_BEG);
|
6203
|
+
pushback(c);
|
6204
|
+
warn_balanced("+", "unary operator");
|
6205
|
+
return '+';
|
5537
6206
|
|
5538
|
-
|
6207
|
+
case '-':
|
6208
|
+
c = nextc();
|
6209
|
+
if(IS_AFTER_OPERATOR()) {
|
6210
|
+
SET_LEX_STATE(EXPR_ARG);
|
6211
|
+
if(c == '@') {
|
6212
|
+
return tUMINUS;
|
6213
|
+
}
|
6214
|
+
pushback(c);
|
6215
|
+
return '-';
|
6216
|
+
}
|
6217
|
+
if(c == '=') {
|
6218
|
+
set_yylval_id('-');
|
6219
|
+
SET_LEX_STATE(EXPR_BEG);
|
6220
|
+
return tOP_ASGN;
|
6221
|
+
}
|
6222
|
+
if(c == '>') {
|
6223
|
+
SET_LEX_STATE(EXPR_ENDFN);
|
6224
|
+
return tLAMBDA;
|
6225
|
+
}
|
6226
|
+
if(IS_BEG() || (IS_SPCARG(c) && arg_ambiguous('-'))) {
|
6227
|
+
SET_LEX_STATE(EXPR_BEG);
|
6228
|
+
pushback(c);
|
6229
|
+
if(c != -1 && ISDIGIT(c)) {
|
6230
|
+
return tUMINUS_NUM;
|
6231
|
+
}
|
6232
|
+
return tUMINUS;
|
6233
|
+
}
|
6234
|
+
SET_LEX_STATE(EXPR_BEG);
|
5539
6235
|
pushback(c);
|
5540
|
-
|
5541
|
-
|
5542
|
-
|
5543
|
-
|
5544
|
-
|
5545
|
-
|
5546
|
-
|
5547
|
-
|
5548
|
-
int type = tFLOAT;
|
5549
|
-
VALUE v;
|
5550
|
-
|
5551
|
-
suffix = number_literal_suffix(seen_e ? NUM_SUFFIX_I : NUM_SUFFIX_ALL);
|
5552
|
-
if(suffix & NUM_SUFFIX_R) {
|
5553
|
-
char *point = &tok()[seen_point];
|
5554
|
-
size_t fraclen = toklen()-seen_point-1;
|
5555
|
-
type = tRATIONAL;
|
5556
|
-
memmove(point, point+1, fraclen+1);
|
5557
|
-
v = rb_cstr_to_inum(tok(), 10, FALSE);
|
5558
|
-
v = rb_rational_new(v,
|
5559
|
-
rb_funcall(INT2FIX(10), rb_intern("**"), 1, INT2NUM(fraclen)));
|
5560
|
-
} else {
|
5561
|
-
double d = strtod(tok(), 0);
|
5562
|
-
if(errno == ERANGE) {
|
5563
|
-
rb_warningS("Float %s out of range", tok());
|
5564
|
-
errno = 0;
|
5565
|
-
}
|
5566
|
-
v = rb_float_new(d);
|
6236
|
+
warn_balanced("-", "unary operator");
|
6237
|
+
return '-';
|
6238
|
+
|
6239
|
+
case '.':
|
6240
|
+
SET_LEX_STATE(EXPR_BEG);
|
6241
|
+
if((c = nextc()) == '.') {
|
6242
|
+
if((c = nextc()) == '.') {
|
6243
|
+
return tDOT3;
|
5567
6244
|
}
|
5568
|
-
|
6245
|
+
pushback(c);
|
6246
|
+
return tDOT2;
|
5569
6247
|
}
|
5570
|
-
|
5571
|
-
|
5572
|
-
|
6248
|
+
pushback(c);
|
6249
|
+
if(c != -1 && ISDIGIT(c)) {
|
6250
|
+
yy_error("no .<digit> floating literal anymore; put 0 before dot");
|
6251
|
+
}
|
6252
|
+
SET_LEX_STATE(EXPR_DOT);
|
6253
|
+
return '.';
|
6254
|
+
|
6255
|
+
case '0': case '1': case '2': case '3': case '4':
|
6256
|
+
case '5': case '6': case '7': case '8': case '9':
|
6257
|
+
return parse_numeric(parser_state, c);
|
5573
6258
|
|
5574
6259
|
case ')':
|
5575
6260
|
case ']':
|
@@ -5578,12 +6263,12 @@ retry:
|
|
5578
6263
|
COND_LEXPOP();
|
5579
6264
|
CMDARG_LEXPOP();
|
5580
6265
|
if(c == ')') {
|
5581
|
-
|
6266
|
+
SET_LEX_STATE(EXPR_ENDFN);
|
5582
6267
|
} else {
|
5583
|
-
|
6268
|
+
SET_LEX_STATE(EXPR_ENDARG);
|
5584
6269
|
}
|
5585
6270
|
if(c == '}') {
|
5586
|
-
if
|
6271
|
+
if(!brace_nest--) c = tSTRING_DEND;
|
5587
6272
|
}
|
5588
6273
|
return c;
|
5589
6274
|
|
@@ -5591,16 +6276,16 @@ retry:
|
|
5591
6276
|
c = nextc();
|
5592
6277
|
if(c == ':') {
|
5593
6278
|
if(IS_BEG() || lex_state_p(EXPR_CLASS) || IS_SPCARG(-1)) {
|
5594
|
-
|
6279
|
+
SET_LEX_STATE(EXPR_BEG);
|
5595
6280
|
return tCOLON3;
|
5596
6281
|
}
|
5597
|
-
|
6282
|
+
SET_LEX_STATE(EXPR_DOT);
|
5598
6283
|
return tCOLON2;
|
5599
6284
|
}
|
5600
|
-
if(IS_END() || ISSPACE(c)) {
|
6285
|
+
if(IS_END() || ISSPACE(c) || c == '#') {
|
5601
6286
|
pushback(c);
|
5602
6287
|
warn_balanced(":", "symbol literal");
|
5603
|
-
|
6288
|
+
SET_LEX_STATE(EXPR_BEG);
|
5604
6289
|
return ':';
|
5605
6290
|
}
|
5606
6291
|
switch(c) {
|
@@ -5614,46 +6299,46 @@ retry:
|
|
5614
6299
|
pushback(c);
|
5615
6300
|
break;
|
5616
6301
|
}
|
5617
|
-
|
6302
|
+
SET_LEX_STATE(EXPR_FNAME);
|
5618
6303
|
return tSYMBEG;
|
5619
6304
|
|
5620
6305
|
case '/':
|
5621
|
-
if(
|
6306
|
+
if(IS_BEG()) {
|
5622
6307
|
lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
|
5623
6308
|
return tREGEXP_BEG;
|
5624
6309
|
}
|
5625
6310
|
if((c = nextc()) == '=') {
|
5626
6311
|
set_yylval_id('/');
|
5627
|
-
|
6312
|
+
SET_LEX_STATE(EXPR_BEG);
|
5628
6313
|
return tOP_ASGN;
|
5629
6314
|
}
|
5630
6315
|
pushback(c);
|
5631
6316
|
if(IS_SPCARG(c)) {
|
5632
|
-
(void)arg_ambiguous();
|
6317
|
+
(void)arg_ambiguous('/');
|
5633
6318
|
lex_strterm = NEW_STRTERM(str_regexp, '/', 0);
|
5634
6319
|
return tREGEXP_BEG;
|
5635
6320
|
}
|
5636
|
-
|
6321
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
5637
6322
|
warn_balanced("/", "regexp literal");
|
5638
6323
|
return '/';
|
5639
6324
|
|
5640
6325
|
case '^':
|
5641
6326
|
if((c = nextc()) == '=') {
|
5642
6327
|
set_yylval_id('^');
|
5643
|
-
|
6328
|
+
SET_LEX_STATE(EXPR_BEG);
|
5644
6329
|
return tOP_ASGN;
|
5645
6330
|
}
|
5646
|
-
|
6331
|
+
SET_LEX_STATE(IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG);
|
5647
6332
|
pushback(c);
|
5648
6333
|
return '^';
|
5649
6334
|
|
5650
6335
|
case ';':
|
5651
|
-
|
6336
|
+
SET_LEX_STATE(EXPR_BEG);
|
5652
6337
|
command_start = TRUE;
|
5653
6338
|
return ';';
|
5654
6339
|
|
5655
6340
|
case ',':
|
5656
|
-
|
6341
|
+
SET_LEX_STATE(EXPR_BEG | EXPR_LABEL);
|
5657
6342
|
return ',';
|
5658
6343
|
|
5659
6344
|
case '~':
|
@@ -5661,9 +6346,9 @@ retry:
|
|
5661
6346
|
if((c = nextc()) != '@') {
|
5662
6347
|
pushback(c);
|
5663
6348
|
}
|
5664
|
-
|
6349
|
+
SET_LEX_STATE(EXPR_ARG);
|
5665
6350
|
} else {
|
5666
|
-
|
6351
|
+
SET_LEX_STATE(EXPR_BEG);
|
5667
6352
|
}
|
5668
6353
|
return '~';
|
5669
6354
|
|
@@ -5676,13 +6361,13 @@ retry:
|
|
5676
6361
|
paren_nest++;
|
5677
6362
|
COND_PUSH(0);
|
5678
6363
|
CMDARG_PUSH(0);
|
5679
|
-
|
6364
|
+
SET_LEX_STATE(EXPR_BEG | EXPR_LABEL);
|
5680
6365
|
return c;
|
5681
6366
|
|
5682
6367
|
case '[':
|
5683
6368
|
paren_nest++;
|
5684
6369
|
if(IS_AFTER_OPERATOR()) {
|
5685
|
-
|
6370
|
+
SET_LEX_STATE(EXPR_ARG);
|
5686
6371
|
if((c = nextc()) == ']') {
|
5687
6372
|
if((c = nextc()) == '=') {
|
5688
6373
|
return tASET;
|
@@ -5691,13 +6376,14 @@ retry:
|
|
5691
6376
|
return tAREF;
|
5692
6377
|
}
|
5693
6378
|
pushback(c);
|
6379
|
+
SET_LEX_STATE(lex_state | EXPR_LABEL);
|
5694
6380
|
return '[';
|
5695
6381
|
} else if(IS_BEG()) {
|
5696
6382
|
c = tLBRACK;
|
5697
|
-
} else if(IS_ARG() && space_seen) {
|
6383
|
+
} else if(IS_ARG() && (space_seen || lex_state_p(EXPR_LABELED))) {
|
5698
6384
|
c = tLBRACK;
|
5699
6385
|
}
|
5700
|
-
|
6386
|
+
SET_LEX_STATE(EXPR_BEG|EXPR_LABEL);
|
5701
6387
|
COND_PUSH(0);
|
5702
6388
|
CMDARG_PUSH(0);
|
5703
6389
|
return c;
|
@@ -5705,14 +6391,16 @@ retry:
|
|
5705
6391
|
case '{':
|
5706
6392
|
++brace_nest;
|
5707
6393
|
if(lpar_beg && lpar_beg == paren_nest) {
|
5708
|
-
|
6394
|
+
SET_LEX_STATE(EXPR_BEG);
|
5709
6395
|
lpar_beg = 0;
|
5710
6396
|
--paren_nest;
|
5711
6397
|
COND_PUSH(0);
|
5712
6398
|
CMDARG_PUSH(0);
|
5713
6399
|
return tLAMBEG;
|
5714
6400
|
}
|
5715
|
-
if(
|
6401
|
+
if(lex_state_p(EXPR_LABELED)) {
|
6402
|
+
c = tLBRACE; /* hash */
|
6403
|
+
} else if(lex_state_p(EXPR_ARG_ANY | EXPR_END | EXPR_ENDFN)) {
|
5716
6404
|
c = '{'; /* block (primary) */
|
5717
6405
|
} else if(lex_state_p(EXPR_ENDARG)) {
|
5718
6406
|
c = tLBRACE_ARG; /* block (expr) */
|
@@ -5721,7 +6409,8 @@ retry:
|
|
5721
6409
|
}
|
5722
6410
|
COND_PUSH(0);
|
5723
6411
|
CMDARG_PUSH(0);
|
5724
|
-
|
6412
|
+
SET_LEX_STATE(EXPR_BEG);
|
6413
|
+
if(c != tLBRACE_ARG) SET_LEX_STATE(lex_state | EXPR_LABEL);
|
5725
6414
|
if(c != tLBRACE) command_start = TRUE;
|
5726
6415
|
return c;
|
5727
6416
|
|
@@ -5735,206 +6424,13 @@ retry:
|
|
5735
6424
|
return '\\';
|
5736
6425
|
|
5737
6426
|
case '%':
|
5738
|
-
|
5739
|
-
intptr_t term;
|
5740
|
-
intptr_t paren;
|
5741
|
-
|
5742
|
-
c = nextc();
|
5743
|
-
quotation:
|
5744
|
-
if(c == -1 || !ISALNUM(c)) {
|
5745
|
-
term = c;
|
5746
|
-
c = 'Q';
|
5747
|
-
} else {
|
5748
|
-
term = nextc();
|
5749
|
-
if(parser_enc_isalnum((int)term, parser_state->enc) || !parser_isascii()) {
|
5750
|
-
yy_error("unknown type of % string");
|
5751
|
-
return 0;
|
5752
|
-
}
|
5753
|
-
}
|
5754
|
-
if(c == -1 || term == -1) {
|
5755
|
-
rb_compile_error(parser_state, "unterminated quoted string meets end of file");
|
5756
|
-
return 0;
|
5757
|
-
}
|
5758
|
-
paren = term;
|
5759
|
-
if(term == '(') term = ')';
|
5760
|
-
else if(term == '[') term = ']';
|
5761
|
-
else if(term == '{') term = '}';
|
5762
|
-
else if(term == '<') term = '>';
|
5763
|
-
else paren = 0;
|
5764
|
-
|
5765
|
-
switch(c) {
|
5766
|
-
case 'Q':
|
5767
|
-
lex_strterm = NEW_STRTERM(str_dquote, term, paren);
|
5768
|
-
return tSTRING_BEG;
|
5769
|
-
|
5770
|
-
case 'q':
|
5771
|
-
lex_strterm = NEW_STRTERM(str_squote, term, paren);
|
5772
|
-
return tSTRING_BEG;
|
5773
|
-
|
5774
|
-
case 'W':
|
5775
|
-
lex_strterm = NEW_STRTERM(str_dword, term, paren);
|
5776
|
-
do {c = nextc();} while(ISSPACE(c));
|
5777
|
-
pushback(c);
|
5778
|
-
return tWORDS_BEG;
|
5779
|
-
|
5780
|
-
case 'w':
|
5781
|
-
lex_strterm = NEW_STRTERM(str_sword, term, paren);
|
5782
|
-
do {c = nextc();} while(ISSPACE(c));
|
5783
|
-
pushback(c);
|
5784
|
-
return tQWORDS_BEG;
|
5785
|
-
|
5786
|
-
case 'I':
|
5787
|
-
lex_strterm = NEW_STRTERM(str_dword, term, paren);
|
5788
|
-
do {c = nextc();} while (ISSPACE(c));
|
5789
|
-
pushback(c);
|
5790
|
-
return tSYMBOLS_BEG;
|
5791
|
-
|
5792
|
-
case 'i':
|
5793
|
-
lex_strterm = NEW_STRTERM(str_sword, term, paren);
|
5794
|
-
do {c = nextc();} while (ISSPACE(c));
|
5795
|
-
pushback(c);
|
5796
|
-
return tQSYMBOLS_BEG;
|
5797
|
-
|
5798
|
-
case 'x':
|
5799
|
-
lex_strterm = NEW_STRTERM(str_xquote, term, paren);
|
5800
|
-
return tXSTRING_BEG;
|
5801
|
-
|
5802
|
-
case 'r':
|
5803
|
-
lex_strterm = NEW_STRTERM(str_regexp, term, paren);
|
5804
|
-
return tREGEXP_BEG;
|
5805
|
-
|
5806
|
-
case 's':
|
5807
|
-
lex_strterm = NEW_STRTERM(str_ssym, term, paren);
|
5808
|
-
lex_state = EXPR_FNAME;
|
5809
|
-
return tSYMBEG;
|
5810
|
-
|
5811
|
-
default:
|
5812
|
-
yy_error("unknown type of % string");
|
5813
|
-
return 0;
|
5814
|
-
}
|
5815
|
-
}
|
5816
|
-
if((c = nextc()) == '=') {
|
5817
|
-
set_yylval_id('%');
|
5818
|
-
lex_state = EXPR_BEG;
|
5819
|
-
return tOP_ASGN;
|
5820
|
-
}
|
5821
|
-
if(IS_SPCARG(c)) {
|
5822
|
-
goto quotation;
|
5823
|
-
}
|
5824
|
-
lex_state = IS_AFTER_OPERATOR() ? EXPR_ARG : EXPR_BEG;
|
5825
|
-
pushback(c);
|
5826
|
-
warn_balanced("%%", "string literal");
|
5827
|
-
return '%';
|
6427
|
+
return parse_percent(parser_state, space_seen, last_state);
|
5828
6428
|
|
5829
6429
|
case '$':
|
5830
|
-
|
5831
|
-
newtok();
|
5832
|
-
c = nextc();
|
5833
|
-
switch(c) {
|
5834
|
-
case '_': /* $_: last read line string */
|
5835
|
-
c = nextc();
|
5836
|
-
if(parser_is_identchar()) {
|
5837
|
-
tokadd('$');
|
5838
|
-
tokadd('_');
|
5839
|
-
break;
|
5840
|
-
}
|
5841
|
-
pushback(c);
|
5842
|
-
c = '_';
|
5843
|
-
/* fall through */
|
5844
|
-
case '~': /* $~: match-data */
|
5845
|
-
case '*': /* $*: argv */
|
5846
|
-
case '$': /* $$: pid */
|
5847
|
-
case '?': /* $?: last status */
|
5848
|
-
case '!': /* $!: error string */
|
5849
|
-
case '@': /* $@: error position */
|
5850
|
-
case '/': /* $/: input record separator */
|
5851
|
-
case '\\': /* $\: output record separator */
|
5852
|
-
case ';': /* $;: field separator */
|
5853
|
-
case ',': /* $,: output field separator */
|
5854
|
-
case '.': /* $.: last read line number */
|
5855
|
-
case '=': /* $=: ignorecase */
|
5856
|
-
case ':': /* $:: load path */
|
5857
|
-
case '<': /* $<: reading filename */
|
5858
|
-
case '>': /* $>: default output handle */
|
5859
|
-
case '\"': /* $": already loaded files */
|
5860
|
-
tokadd('$');
|
5861
|
-
tokadd(c);
|
5862
|
-
goto gvar;
|
5863
|
-
|
5864
|
-
case '-':
|
5865
|
-
tokadd('$');
|
5866
|
-
tokadd(c);
|
5867
|
-
c = nextc();
|
5868
|
-
if(parser_is_identchar()) {
|
5869
|
-
if(tokadd_mbchar(c) == -1) return 0;
|
5870
|
-
} else {
|
5871
|
-
pushback(c);
|
5872
|
-
pushback('-');
|
5873
|
-
return '$';
|
5874
|
-
}
|
5875
|
-
gvar:
|
5876
|
-
tokfix();
|
5877
|
-
// TODO rb_intern3(tok(), tokidx, current_enc);
|
5878
|
-
set_yylval_name(parser_intern(tok()));
|
5879
|
-
return tGVAR;
|
5880
|
-
|
5881
|
-
case '&': /* $&: last match */
|
5882
|
-
case '`': /* $`: string before last match */
|
5883
|
-
case '\'': /* $': string after last match */
|
5884
|
-
case '+': /* $+: string matches last paren. */
|
5885
|
-
if(lex_state_of_p(last_state, EXPR_FNAME)) {
|
5886
|
-
tokadd('$');
|
5887
|
-
tokadd(c);
|
5888
|
-
goto gvar;
|
5889
|
-
}
|
5890
|
-
set_yylval_node(NEW_BACK_REF(c));
|
5891
|
-
return tBACK_REF;
|
5892
|
-
|
5893
|
-
case '1': case '2': case '3':
|
5894
|
-
case '4': case '5': case '6':
|
5895
|
-
case '7': case '8': case '9':
|
5896
|
-
tokadd('$');
|
5897
|
-
do {
|
5898
|
-
tokadd(c);
|
5899
|
-
c = nextc();
|
5900
|
-
} while(c != -1 && ISDIGIT(c));
|
5901
|
-
pushback(c);
|
5902
|
-
if(lex_state_of_p(last_state, EXPR_FNAME)) goto gvar;
|
5903
|
-
tokfix();
|
5904
|
-
set_yylval_node(NEW_NTH_REF(atoi(tok()+1)));
|
5905
|
-
return tNTH_REF;
|
5906
|
-
|
5907
|
-
default:
|
5908
|
-
if(!parser_is_identchar()) {
|
5909
|
-
pushback(c);
|
5910
|
-
rb_compile_error(parser_state,
|
5911
|
-
"`$%c' is not allowed as a global variable name", c);
|
5912
|
-
return 0;
|
5913
|
-
}
|
5914
|
-
case '0':
|
5915
|
-
tokadd('$');
|
5916
|
-
}
|
5917
|
-
break;
|
6430
|
+
return parse_gvar(parser_state, last_state);
|
5918
6431
|
|
5919
6432
|
case '@':
|
5920
|
-
|
5921
|
-
newtok();
|
5922
|
-
tokadd('@');
|
5923
|
-
if(c == '@') {
|
5924
|
-
tokadd('@');
|
5925
|
-
c = nextc();
|
5926
|
-
}
|
5927
|
-
if(c != -1 && (ISDIGIT(c) || !parser_is_identchar())) {
|
5928
|
-
if(tokidx == 1) {
|
5929
|
-
rb_compile_error(parser_state,
|
5930
|
-
"`@%c' is not allowed as an instance variable name", c);
|
5931
|
-
} else {
|
5932
|
-
rb_compile_error(parser_state,
|
5933
|
-
"`@@%c' is not allowed as a class variable name", c);
|
5934
|
-
}
|
5935
|
-
return 0;
|
5936
|
-
}
|
5937
|
-
break;
|
6433
|
+
return parse_atmark(parser_state, last_state);
|
5938
6434
|
|
5939
6435
|
case '_':
|
5940
6436
|
if(was_bol() && whole_match_p("__END__", 7, 0)) {
|
@@ -5955,132 +6451,7 @@ retry:
|
|
5955
6451
|
break;
|
5956
6452
|
}
|
5957
6453
|
|
5958
|
-
|
5959
|
-
do {
|
5960
|
-
if(!ISASCII(c)) mb = ENC_CODERANGE_UNKNOWN;
|
5961
|
-
if(tokadd_mbchar(c) == -1) return 0;
|
5962
|
-
c = nextc();
|
5963
|
-
} while(parser_is_identchar());
|
5964
|
-
switch(tok()[0]) {
|
5965
|
-
case '@': case '$':
|
5966
|
-
pushback(c);
|
5967
|
-
break;
|
5968
|
-
default:
|
5969
|
-
if((c == '!' || c == '?') && !peek('=')) {
|
5970
|
-
tokadd(c);
|
5971
|
-
} else {
|
5972
|
-
pushback(c);
|
5973
|
-
}
|
5974
|
-
}
|
5975
|
-
tokfix();
|
5976
|
-
{
|
5977
|
-
int result = 0;
|
5978
|
-
|
5979
|
-
last_state = lex_state;
|
5980
|
-
switch(tok()[0]) {
|
5981
|
-
case '$':
|
5982
|
-
lex_state = EXPR_END;
|
5983
|
-
result = tGVAR;
|
5984
|
-
break;
|
5985
|
-
case '@':
|
5986
|
-
lex_state = EXPR_END;
|
5987
|
-
if(tok()[1] == '@') {
|
5988
|
-
result = tCVAR;
|
5989
|
-
} else {
|
5990
|
-
result = tIVAR;
|
5991
|
-
}
|
5992
|
-
break;
|
5993
|
-
default:
|
5994
|
-
if(toklast() == '!' || toklast() == '?') {
|
5995
|
-
result = tFID;
|
5996
|
-
} else {
|
5997
|
-
if(lex_state_p(EXPR_FNAME)) {
|
5998
|
-
if((c = nextc()) == '=' && !peek('~') && !peek('>') &&
|
5999
|
-
(!peek('=') || (peek_n('>', 1)))) {
|
6000
|
-
result = tIDENTIFIER;
|
6001
|
-
tokadd(c);
|
6002
|
-
tokfix();
|
6003
|
-
} else {
|
6004
|
-
pushback(c);
|
6005
|
-
}
|
6006
|
-
}
|
6007
|
-
if(result == 0 && ISUPPER(tok()[0])) {
|
6008
|
-
result = tCONSTANT;
|
6009
|
-
} else {
|
6010
|
-
result = tIDENTIFIER;
|
6011
|
-
}
|
6012
|
-
}
|
6013
|
-
|
6014
|
-
if(IS_LABEL_POSSIBLE()) {
|
6015
|
-
if(IS_LABEL_SUFFIX(0)) {
|
6016
|
-
lex_state = EXPR_BEG;
|
6017
|
-
nextc();
|
6018
|
-
set_yylval_name(TOK_INTERN(!ENC_SINGLE(mb)));
|
6019
|
-
return tLABEL;
|
6020
|
-
}
|
6021
|
-
}
|
6022
|
-
if(mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {
|
6023
|
-
const struct kwtable *kw;
|
6024
|
-
|
6025
|
-
/* See if it is a reserved word. */
|
6026
|
-
kw = reserved_word(tok(), toklen());
|
6027
|
-
if(kw) {
|
6028
|
-
enum lex_state_e state = lex_state;
|
6029
|
-
lex_state = kw->state;
|
6030
|
-
if(lex_state_of_p(state, EXPR_FNAME)) {
|
6031
|
-
set_yylval_name(parser_intern(kw->name));
|
6032
|
-
return kw->id[0];
|
6033
|
-
}
|
6034
|
-
if(lex_state_p(EXPR_BEG)) {
|
6035
|
-
command_start = TRUE;
|
6036
|
-
}
|
6037
|
-
if(kw->id[0] == keyword_do) {
|
6038
|
-
if(lpar_beg && lpar_beg == paren_nest) {
|
6039
|
-
lpar_beg = 0;
|
6040
|
-
--paren_nest;
|
6041
|
-
return keyword_do_LAMBDA;
|
6042
|
-
}
|
6043
|
-
if(COND_P()) return keyword_do_cond;
|
6044
|
-
if(CMDARG_P() && !lex_state_of_p(state, EXPR_CMDARG))
|
6045
|
-
return keyword_do_block;
|
6046
|
-
if(lex_state_of_p(state, EXPR_BEG | EXPR_ENDARG))
|
6047
|
-
return keyword_do_block;
|
6048
|
-
return keyword_do;
|
6049
|
-
}
|
6050
|
-
if(lex_state_of_p(state, EXPR_BEG | EXPR_VALUE))
|
6051
|
-
return kw->id[0];
|
6052
|
-
else {
|
6053
|
-
if(kw->id[0] != kw->id[1])
|
6054
|
-
lex_state = EXPR_BEG;
|
6055
|
-
return kw->id[1];
|
6056
|
-
}
|
6057
|
-
}
|
6058
|
-
}
|
6059
|
-
|
6060
|
-
if(lex_state_p(EXPR_BEG_ANY | EXPR_ARG_ANY | EXPR_DOT)) {
|
6061
|
-
if(cmd_state) {
|
6062
|
-
lex_state = EXPR_CMDARG;
|
6063
|
-
} else {
|
6064
|
-
lex_state = EXPR_ARG;
|
6065
|
-
}
|
6066
|
-
} else if(lex_state == EXPR_FNAME) {
|
6067
|
-
lex_state = EXPR_ENDFN;
|
6068
|
-
} else {
|
6069
|
-
lex_state = EXPR_END;
|
6070
|
-
}
|
6071
|
-
}
|
6072
|
-
{
|
6073
|
-
ID ident = TOK_INTERN(!ENC_SINGLE(mb));
|
6074
|
-
|
6075
|
-
set_yylval_name(ident);
|
6076
|
-
if(!lex_state_of_p(last_state, EXPR_DOT | EXPR_FNAME) &&
|
6077
|
-
is_local_id(ident) && lvar_defined(ident)) {
|
6078
|
-
lex_state = EXPR_END;
|
6079
|
-
}
|
6080
|
-
}
|
6081
|
-
|
6082
|
-
return result;
|
6083
|
-
}
|
6454
|
+
return parse_ident(parser_state, c, cmd_state);
|
6084
6455
|
}
|
6085
6456
|
|
6086
6457
|
#if YYPURE
|
@@ -6378,7 +6749,7 @@ parser_literal_concat(rb_parser_state* parser_state, NODE *head, NODE *tail)
|
|
6378
6749
|
&& (headlast = head->nd_next->nd_end->nd_head)
|
6379
6750
|
&& nd_type(headlast) == NODE_STR) {
|
6380
6751
|
lit = headlast->nd_lit;
|
6381
|
-
if
|
6752
|
+
if(!literal_concat0(lit, tail->nd_lit))
|
6382
6753
|
goto error;
|
6383
6754
|
tail->nd_lit = Qnil;
|
6384
6755
|
goto append;
|
@@ -6442,25 +6813,25 @@ static const struct {
|
|
6442
6813
|
ID token;
|
6443
6814
|
const char *name;
|
6444
6815
|
} op_tbl[] = {
|
6445
|
-
{tDOT2,
|
6446
|
-
{tDOT3,
|
6447
|
-
{tPOW,
|
6816
|
+
{tDOT2, ".."},
|
6817
|
+
{tDOT3, "..."},
|
6818
|
+
{tPOW, "**"},
|
6448
6819
|
{tDSTAR, "**"},
|
6449
|
-
{tUPLUS,
|
6450
|
-
{tUMINUS,
|
6451
|
-
{tCMP,
|
6452
|
-
{tGEQ,
|
6453
|
-
{tLEQ,
|
6454
|
-
{tEQ,
|
6455
|
-
{tEQQ,
|
6456
|
-
{tNEQ,
|
6457
|
-
{tMATCH,
|
6458
|
-
{tNMATCH,
|
6459
|
-
{tAREF,
|
6460
|
-
{tASET,
|
6461
|
-
{tLSHFT,
|
6462
|
-
{tRSHFT,
|
6463
|
-
{tCOLON2,
|
6820
|
+
{tUPLUS, "+@"},
|
6821
|
+
{tUMINUS, "-@"},
|
6822
|
+
{tCMP, "<=>"},
|
6823
|
+
{tGEQ, ">="},
|
6824
|
+
{tLEQ, "<="},
|
6825
|
+
{tEQ, "=="},
|
6826
|
+
{tEQQ, "==="},
|
6827
|
+
{tNEQ, "!="},
|
6828
|
+
{tMATCH, "=~"},
|
6829
|
+
{tNMATCH, "!~"},
|
6830
|
+
{tAREF, "[]"},
|
6831
|
+
{tASET, "[]="},
|
6832
|
+
{tLSHFT, "<<"},
|
6833
|
+
{tRSHFT, ">>"},
|
6834
|
+
{tCOLON2, "::"},
|
6464
6835
|
|
6465
6836
|
// Added for Rubinius
|
6466
6837
|
{'!', "!"},
|
@@ -6715,7 +7086,7 @@ parser_block_dup_check(rb_parser_state* parser_state, NODE *node1, NODE *node2)
|
|
6715
7086
|
static const char id_type_names[][9] = {
|
6716
7087
|
"LOCAL",
|
6717
7088
|
"INSTANCE",
|
6718
|
-
"",
|
7089
|
+
"", /* INSTANCE2 */
|
6719
7090
|
"GLOBAL",
|
6720
7091
|
"ATTRSET",
|
6721
7092
|
"CONST",
|
@@ -6727,10 +7098,10 @@ static ID
|
|
6727
7098
|
rb_id_attrset(ID id)
|
6728
7099
|
{
|
6729
7100
|
if(!is_notop_id(id)) {
|
6730
|
-
switch
|
7101
|
+
switch(id) {
|
6731
7102
|
case tAREF:
|
6732
7103
|
case tASET:
|
6733
|
-
return tASET;
|
7104
|
+
return tASET; /* only exception */
|
6734
7105
|
}
|
6735
7106
|
rb_name_error(id, "cannot make operator ID :%s attrset", rb_id2name(id));
|
6736
7107
|
} else {
|
@@ -6756,12 +7127,17 @@ rb_id_attrset(ID id)
|
|
6756
7127
|
}
|
6757
7128
|
|
6758
7129
|
static NODE *
|
6759
|
-
parser_attrset(rb_parser_state* parser_state, NODE *recv, ID id)
|
7130
|
+
parser_attrset(rb_parser_state* parser_state, NODE *recv, ID atype, ID id)
|
6760
7131
|
{
|
6761
7132
|
if(recv && nd_type(recv) == NODE_SELF) {
|
6762
7133
|
recv = (NODE *)1;
|
6763
7134
|
}
|
6764
|
-
|
7135
|
+
|
7136
|
+
if(CALL_Q_P(atype)) {
|
7137
|
+
return NEW_ANDATTRASGN(recv, rb_id_attrset(id), 0);
|
7138
|
+
} else {
|
7139
|
+
return NEW_ATTRASGN(recv, rb_id_attrset(id), 0);
|
7140
|
+
}
|
6765
7141
|
}
|
6766
7142
|
|
6767
7143
|
static void
|
@@ -6827,6 +7203,7 @@ parser_node_assign(rb_parser_state* parser_state, NODE *lhs, NODE *rhs)
|
|
6827
7203
|
lhs->nd_value = rhs;
|
6828
7204
|
break;
|
6829
7205
|
|
7206
|
+
case NODE_ANDATTRASGN:
|
6830
7207
|
case NODE_ATTRASGN:
|
6831
7208
|
case NODE_CALL:
|
6832
7209
|
lhs->nd_args = arg_append(lhs->nd_args, rhs);
|
@@ -6869,8 +7246,8 @@ parser_new_op_assign(rb_parser_state* parser_state, NODE *lhs, ID op, NODE *rhs)
|
|
6869
7246
|
}
|
6870
7247
|
|
6871
7248
|
static NODE*
|
6872
|
-
parser_new_attr_op_assign(rb_parser_state* parser_state,
|
6873
|
-
|
7249
|
+
parser_new_attr_op_assign(rb_parser_state* parser_state, NODE *lhs,
|
7250
|
+
ID atype, ID attr, ID op, NODE *rhs)
|
6874
7251
|
{
|
6875
7252
|
NODE *asgn;
|
6876
7253
|
|
@@ -6881,7 +7258,7 @@ parser_new_attr_op_assign(rb_parser_state* parser_state,
|
|
6881
7258
|
} else {
|
6882
7259
|
op = convert_op(op);
|
6883
7260
|
}
|
6884
|
-
asgn = NEW_OP_ASGN2(lhs, attr, op, rhs);
|
7261
|
+
asgn = NEW_OP_ASGN2(lhs, CALL_Q_P(atype), attr, op, rhs);
|
6885
7262
|
fixpos(asgn, lhs);
|
6886
7263
|
|
6887
7264
|
return asgn;
|
@@ -7596,7 +7973,7 @@ scan_hex(const char *start, size_t len, size_t *retlen)
|
|
7596
7973
|
}
|
7597
7974
|
|
7598
7975
|
static ID
|
7599
|
-
parser_internal_id(rb_parser_state
|
7976
|
+
parser_internal_id(rb_parser_state* parser_state)
|
7600
7977
|
{
|
7601
7978
|
ID id = (ID)vtable_size(locals_table->args) + (ID)vtable_size(locals_table->vars);
|
7602
7979
|
id += ((tLAST_TOKEN - ID_INTERNAL) >> ID_SCOPE_SHIFT) + 1;
|