c_lexer 2.6.3.0.0 → 2.6.4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/c_lexer.gemspec +1 -1
- data/ext/lexer/lexer.c +10837 -10508
- data/ext/lexer/lexer.h +12 -2
- data/ext/lexer/lexer.rl +133 -6
- data/lib/c_lexer.rb +3 -0
- data/lib/c_lexer/version.rb +1 -1
- metadata +3 -3
data/ext/lexer/lexer.h
CHANGED
@@ -50,6 +50,8 @@ struct Lexer {
|
|
50
50
|
VALUE escape;
|
51
51
|
|
52
52
|
int cs_before_block_comment;
|
53
|
+
|
54
|
+
VALUE max_numparam_stack;
|
53
55
|
};
|
54
56
|
|
55
57
|
static void lexer_mark(void*);
|
@@ -229,6 +231,7 @@ VALUE tNEQ;
|
|
229
231
|
VALUE tNL;
|
230
232
|
VALUE tNMATCH;
|
231
233
|
VALUE tNTH_REF;
|
234
|
+
VALUE tNUMPARAM;
|
232
235
|
VALUE tOP_ASGN;
|
233
236
|
VALUE tOROP;
|
234
237
|
VALUE tPERCENT;
|
@@ -268,6 +271,7 @@ VALUE tXSTRING_BEG;
|
|
268
271
|
VALUE comment_klass;
|
269
272
|
VALUE diagnostic_klass;
|
270
273
|
VALUE range_klass;
|
274
|
+
VALUE max_numparam_stack_klass;
|
271
275
|
|
272
276
|
VALUE severity_error;
|
273
277
|
VALUE fatal;
|
@@ -281,6 +285,8 @@ VALUE cvar_name;
|
|
281
285
|
VALUE embedded_document;
|
282
286
|
VALUE empty_numeric;
|
283
287
|
VALUE escape_eof;
|
288
|
+
VALUE heredoc_id_ends_with_nl;
|
289
|
+
VALUE heredoc_id_has_newline;
|
284
290
|
VALUE incomplete_escape;
|
285
291
|
VALUE invalid_escape;
|
286
292
|
VALUE invalid_escape_use;
|
@@ -288,22 +294,26 @@ VALUE invalid_hex_escape;
|
|
288
294
|
VALUE invalid_octal;
|
289
295
|
VALUE invalid_unicode_escape;
|
290
296
|
VALUE ivar_name;
|
291
|
-
VALUE
|
292
|
-
VALUE heredoc_id_has_newline;
|
297
|
+
VALUE leading_zero_in_numparam;
|
293
298
|
VALUE no_dot_digit_literal;
|
299
|
+
VALUE numparam_outside_block;
|
300
|
+
VALUE ordinary_param_defined;
|
294
301
|
VALUE prefix;
|
295
302
|
VALUE regexp_options;
|
296
303
|
VALUE string_eof;
|
304
|
+
VALUE too_large_numparam;
|
297
305
|
VALUE trailing_in_number;
|
298
306
|
VALUE unexpected;
|
299
307
|
VALUE unexpected_percent_str;
|
300
308
|
VALUE unicode_point_too_large;
|
309
|
+
VALUE unterminated_heredoc_id;
|
301
310
|
VALUE unterminated_unicode;
|
302
311
|
|
303
312
|
VALUE empty_array;
|
304
313
|
VALUE blank_string;
|
305
314
|
VALUE newline;
|
306
315
|
VALUE escaped_newline;
|
316
|
+
VALUE slash_r;
|
307
317
|
VALUE utf8_encoding;
|
308
318
|
VALUE cr_then_anything_to_eol;
|
309
319
|
VALUE crs_to_eol;
|
data/ext/lexer/lexer.rl
CHANGED
@@ -12,6 +12,7 @@
|
|
12
12
|
#define GET_LEXER(self) Data_Get_Struct(self, Lexer, lexer)
|
13
13
|
#define STATIC_ENV_DECLARED(name) \
|
14
14
|
lexer->static_env != Qnil && RTEST(rb_funcall(lexer->static_env, rb_intern("declared?"), 1, name))
|
15
|
+
#define NUMPARAM_MAX 100
|
15
16
|
|
16
17
|
#include "stack_state/cmdarg.h"
|
17
18
|
#include "stack_state/cond.h"
|
@@ -44,6 +45,7 @@ static VALUE lexer_alloc(VALUE klass)
|
|
44
45
|
lexer->comments = Qnil;
|
45
46
|
lexer->encoding = Qnil;
|
46
47
|
lexer->escape = Qnil;
|
48
|
+
lexer->max_numparam_stack = Qnil;
|
47
49
|
|
48
50
|
ss_stack_init(&lexer->cond_stack);
|
49
51
|
ss_stack_init(&lexer->cmdarg_stack);
|
@@ -66,6 +68,7 @@ static void lexer_mark(void *ptr)
|
|
66
68
|
rb_gc_mark(lexer->comments);
|
67
69
|
rb_gc_mark(lexer->encoding);
|
68
70
|
rb_gc_mark(lexer->escape);
|
71
|
+
rb_gc_mark(lexer->max_numparam_stack);
|
69
72
|
|
70
73
|
for (literal *lit = lexer->literal_stack.bottom; lit < lexer->literal_stack.top; lit++) {
|
71
74
|
rb_gc_mark(lit->buffer);
|
@@ -157,6 +160,8 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
|
157
160
|
|
158
161
|
lexer->cs_before_block_comment = lex_en_line_begin;
|
159
162
|
|
163
|
+
lexer->max_numparam_stack = rb_class_new_instance(0, NULL, max_numparam_stack_klass) ;
|
164
|
+
|
160
165
|
return self;
|
161
166
|
}
|
162
167
|
|
@@ -306,6 +311,18 @@ static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
|
|
306
311
|
return val;
|
307
312
|
}
|
308
313
|
|
314
|
+
static VALUE lexer_max_numparam_stack(VALUE self)
|
315
|
+
{
|
316
|
+
Lexer* lexer = GET_LEXER(self);
|
317
|
+
return lexer->max_numparam_stack;
|
318
|
+
}
|
319
|
+
|
320
|
+
static VALUE lexer_max_numparam(VALUE self)
|
321
|
+
{
|
322
|
+
Lexer* lexer = GET_LEXER(self);
|
323
|
+
return rb_funcall(lexer->max_numparam_stack, rb_intern("top"), 0);
|
324
|
+
}
|
325
|
+
|
309
326
|
static VALUE lexer_get_dedent_level(VALUE self)
|
310
327
|
{
|
311
328
|
Lexer* lexer = GET_LEXER(self);
|
@@ -335,6 +352,7 @@ static VALUE lexer_advance(VALUE self)
|
|
335
352
|
long ident_ts = 0, ident_te = 0;
|
336
353
|
long numeric_s = 0;
|
337
354
|
Data_Get_Struct(self, Lexer, lexer);
|
355
|
+
VALUE diag_msg;
|
338
356
|
|
339
357
|
if (RARRAY_LEN(lexer->token_queue) > 0)
|
340
358
|
return rb_ary_shift(lexer->token_queue);
|
@@ -788,9 +806,9 @@ void Init_lexer()
|
|
788
806
|
init_symbol(tASSOC);
|
789
807
|
init_symbol(tBACK_REF);
|
790
808
|
init_symbol(tBACK_REF2);
|
809
|
+
init_symbol(tBANG);
|
791
810
|
init_symbol(tBDOT2);
|
792
811
|
init_symbol(tBDOT3);
|
793
|
-
init_symbol(tBANG);
|
794
812
|
init_symbol(tCARET);
|
795
813
|
init_symbol(tCHARACTER);
|
796
814
|
init_symbol(tCMP);
|
@@ -841,6 +859,7 @@ void Init_lexer()
|
|
841
859
|
init_symbol(tNL);
|
842
860
|
init_symbol(tNMATCH);
|
843
861
|
init_symbol(tNTH_REF);
|
862
|
+
init_symbol(tNUMPARAM);
|
844
863
|
init_symbol(tOP_ASGN);
|
845
864
|
init_symbol(tOROP);
|
846
865
|
init_symbol(tPERCENT);
|
@@ -890,6 +909,8 @@ void Init_lexer()
|
|
890
909
|
init_symbol(embedded_document);
|
891
910
|
init_symbol(empty_numeric);
|
892
911
|
init_symbol(escape_eof);
|
912
|
+
init_symbol(heredoc_id_ends_with_nl);
|
913
|
+
init_symbol(heredoc_id_has_newline);
|
893
914
|
init_symbol(incomplete_escape);
|
894
915
|
init_symbol(invalid_escape);
|
895
916
|
init_symbol(invalid_escape_use);
|
@@ -897,16 +918,19 @@ void Init_lexer()
|
|
897
918
|
init_symbol(invalid_octal);
|
898
919
|
init_symbol(invalid_unicode_escape);
|
899
920
|
init_symbol(ivar_name);
|
900
|
-
init_symbol(
|
901
|
-
init_symbol(heredoc_id_has_newline);
|
921
|
+
init_symbol(leading_zero_in_numparam);
|
902
922
|
init_symbol(no_dot_digit_literal);
|
923
|
+
init_symbol(numparam_outside_block);
|
924
|
+
init_symbol(ordinary_param_defined);
|
903
925
|
init_symbol(prefix);
|
904
926
|
init_symbol(regexp_options);
|
905
927
|
init_symbol(string_eof);
|
928
|
+
init_symbol(too_large_numparam);
|
906
929
|
init_symbol(trailing_in_number);
|
907
930
|
init_symbol(unexpected);
|
908
931
|
init_symbol(unexpected_percent_str);
|
909
932
|
init_symbol(unicode_point_too_large);
|
933
|
+
init_symbol(unterminated_heredoc_id);
|
910
934
|
init_symbol(unterminated_unicode);
|
911
935
|
|
912
936
|
VALUE m_Parser = rb_define_module("Parser");
|
@@ -958,12 +982,17 @@ void Init_lexer()
|
|
958
982
|
rb_define_method(c_Lexer, "source_buffer=", lexer_set_source_buffer, 1);
|
959
983
|
rb_define_method(c_Lexer, "force_utf32=", lexer_set_force_utf32, 1);
|
960
984
|
|
985
|
+
rb_define_method(c_Lexer, "max_numparam_stack", lexer_max_numparam_stack, 0);
|
986
|
+
rb_define_method(c_Lexer, "max_numparam", lexer_max_numparam, 0);
|
987
|
+
|
961
988
|
rb_define_attr(c_Lexer, "context", 1, 1);
|
962
989
|
|
963
990
|
VALUE m_Source = rb_const_get(m_Parser, rb_intern("Source"));
|
964
991
|
comment_klass = rb_const_get(m_Source, rb_intern("Comment"));
|
965
992
|
diagnostic_klass = rb_const_get(m_Parser, rb_intern("Diagnostic"));
|
966
993
|
range_klass = rb_const_get(m_Source, rb_intern("Range"));
|
994
|
+
VALUE lexer_class = rb_const_get(m_Parser, rb_intern("Lexer"));
|
995
|
+
max_numparam_stack_klass = rb_const_get(lexer_class, rb_intern("MaxNumparamStack"));
|
967
996
|
|
968
997
|
empty_array = rb_obj_freeze(rb_ary_new2(0));
|
969
998
|
rb_gc_register_address(&empty_array);
|
@@ -973,6 +1002,8 @@ void Init_lexer()
|
|
973
1002
|
rb_gc_register_address(&newline);
|
974
1003
|
escaped_newline = rb_obj_freeze(rb_str_new2("\\\n"));
|
975
1004
|
rb_gc_register_address(&escaped_newline);
|
1005
|
+
slash_r = rb_obj_freeze(rb_str_new2("\r"));
|
1006
|
+
rb_gc_register_address(&slash_r);
|
976
1007
|
|
977
1008
|
if (rb_const_defined(rb_cObject, rb_intern("Encoding"))) {
|
978
1009
|
VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
|
@@ -1170,6 +1201,21 @@ void Init_lexer()
|
|
1170
1201
|
empty_array);
|
1171
1202
|
}
|
1172
1203
|
|
1204
|
+
action read_post_meta_or_ctrl_char {
|
1205
|
+
VALUE codepoint = rb_funcall(lexer->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
|
1206
|
+
lexer->escape = rb_funcall(codepoint, rb_intern("chr"), 0);
|
1207
|
+
int codepoint_i = FIX2INT(rb_funcall(codepoint, rb_intern("ord"), 0));
|
1208
|
+
|
1209
|
+
if (
|
1210
|
+
lexer->version >= 27 && (
|
1211
|
+
(codepoint_i >= 0 && codepoint_i <= 8) ||
|
1212
|
+
(codepoint_i >= 14 && codepoint_i <= 31)
|
1213
|
+
)
|
1214
|
+
) {
|
1215
|
+
diagnostic(lexer, fatal, invalid_escape, Qnil, range(lexer, ts, te), empty_array);
|
1216
|
+
}
|
1217
|
+
}
|
1218
|
+
|
1173
1219
|
action slash_c_char {
|
1174
1220
|
char c = *RSTRING_PTR(lexer->escape) & 0x9f;
|
1175
1221
|
lexer->escape = rb_str_new(&c, 1);
|
@@ -1184,13 +1230,13 @@ void Init_lexer()
|
|
1184
1230
|
|
1185
1231
|
maybe_escaped_char = (
|
1186
1232
|
'\\' c_any %unescape_char
|
1187
|
-
| ( c_any - [\\] ) %
|
1233
|
+
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
|
1188
1234
|
);
|
1189
1235
|
|
1190
1236
|
maybe_escaped_ctrl_char = (
|
1191
1237
|
'\\' c_any %unescape_char %slash_c_char
|
1192
1238
|
| '?' % { lexer->escape = rb_str_new2("\x7f"); }
|
1193
|
-
| ( c_any - [\\?] ) %
|
1239
|
+
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
|
1194
1240
|
);
|
1195
1241
|
|
1196
1242
|
escape = (
|
@@ -1636,6 +1682,47 @@ void Init_lexer()
|
|
1636
1682
|
fnext *stack[--top]; fbreak;
|
1637
1683
|
};
|
1638
1684
|
|
1685
|
+
'@' [0-9]+
|
1686
|
+
=> {
|
1687
|
+
VALUE token = tok(lexer, ts, te);
|
1688
|
+
|
1689
|
+
if (lexer->version < 27) {
|
1690
|
+
VALUE hash = rb_hash_new();
|
1691
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("name")), token);
|
1692
|
+
diagnostic(lexer, severity_error, ivar_name, hash, range(lexer, ts, te), empty_array);
|
1693
|
+
}
|
1694
|
+
|
1695
|
+
VALUE value = rb_funcall(token, rb_intern("[]"), 1, rb_range_new(INT2NUM(1), INT2NUM(-1), 0));
|
1696
|
+
VALUE int_value = rb_funcall(value, rb_intern("to_i"), 0);
|
1697
|
+
|
1698
|
+
if (*RSTRING_PTR(value) == '0') {
|
1699
|
+
diagnostic(lexer, severity_error, leading_zero_in_numparam, Qnil, range(lexer, ts, te), empty_array);
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
if (FIX2INT(int_value) > NUMPARAM_MAX) {
|
1703
|
+
diagnostic(lexer, severity_error, too_large_numparam, Qnil, range(lexer, ts, te), empty_array);
|
1704
|
+
}
|
1705
|
+
|
1706
|
+
VALUE context = rb_iv_get(self, "@context");
|
1707
|
+
int in_block = RTEST(rb_funcall(context, rb_intern("in_block?"), 0));
|
1708
|
+
int in_lambda = RTEST(rb_funcall(context, rb_intern("in_lambda?"), 0));
|
1709
|
+
|
1710
|
+
if (!in_block && !in_lambda) {
|
1711
|
+
diagnostic(lexer, severity_error, numparam_outside_block, Qnil, range(lexer, ts, te), empty_array);
|
1712
|
+
}
|
1713
|
+
|
1714
|
+
VALUE max_numparam_stack = lexer->max_numparam_stack;
|
1715
|
+
int can_have_numparams = RTEST(rb_funcall(max_numparam_stack, rb_intern("can_have_numparams?"), 0));
|
1716
|
+
if (!can_have_numparams) {
|
1717
|
+
diagnostic(lexer, severity_error, ordinary_param_defined, Qnil, range(lexer, ts, te), empty_array);
|
1718
|
+
}
|
1719
|
+
|
1720
|
+
rb_funcall(max_numparam_stack, rb_intern("register"), 1, int_value);
|
1721
|
+
|
1722
|
+
emit_token(lexer, tNUMPARAM, tok(lexer, ts + 1, te), ts, te);
|
1723
|
+
fnext *stack[--top]; fbreak;
|
1724
|
+
};
|
1725
|
+
|
1639
1726
|
instance_var_v => {
|
1640
1727
|
VALUE str = tok(lexer, ts, te);
|
1641
1728
|
|
@@ -1960,7 +2047,15 @@ void Init_lexer()
|
|
1960
2047
|
|
1961
2048
|
VALUE delimiter = tok(lexer, rng_s, rng_e);
|
1962
2049
|
|
1963
|
-
if (lexer->version >=
|
2050
|
+
if (lexer->version >= 27) {
|
2051
|
+
int newlines_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline));
|
2052
|
+
int slash_r_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, slash_r));
|
2053
|
+
|
2054
|
+
if (newlines_count > 0 || slash_r_count > 0) {
|
2055
|
+
diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil,
|
2056
|
+
range(lexer, ts, ts + 1), empty_array);
|
2057
|
+
}
|
2058
|
+
} else if (lexer->version >= 24) {
|
1964
2059
|
if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
|
1965
2060
|
if (str_end_with_p(delimiter, "\n")) {
|
1966
2061
|
diagnostic(lexer, warning, heredoc_id_ends_with_nl, Qnil,
|
@@ -1985,6 +2080,21 @@ void Init_lexer()
|
|
1985
2080
|
}
|
1986
2081
|
};
|
1987
2082
|
|
2083
|
+
# Escaped unterminated heredoc start
|
2084
|
+
# <<'END | <<"END | <<`END |
|
2085
|
+
# <<-'END | <<-"END | <<-`END |
|
2086
|
+
# <<~'END | <<~"END | <<~`END
|
2087
|
+
#
|
2088
|
+
# If the heredoc is terminated the rule above should handle it
|
2089
|
+
'<<' [~\-]?
|
2090
|
+
('"' (any - c_nl - '"')*
|
2091
|
+
|"'" (any - c_nl - "'")*
|
2092
|
+
|"`" (any - c_nl - "`")
|
2093
|
+
)
|
2094
|
+
=> {
|
2095
|
+
diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil, range(lexer, ts, ts + 1), empty_array);
|
2096
|
+
};
|
2097
|
+
|
1988
2098
|
':' ('&&' | '||') => {
|
1989
2099
|
fhold; fhold;
|
1990
2100
|
emit_token(lexer, tSYMBEG, tok(lexer, ts, ts + 1), ts, ts + 1);
|
@@ -2015,6 +2125,23 @@ void Init_lexer()
|
|
2015
2125
|
fnext expr_end; fbreak;
|
2016
2126
|
};
|
2017
2127
|
|
2128
|
+
':' ( '@' %{ tm = p - 1; diag_msg = ivar_name; }
|
2129
|
+
| '@@' %{ tm = p - 2; diag_msg = cvar_name; }
|
2130
|
+
) [0-9]*
|
2131
|
+
=> {
|
2132
|
+
if (lexer->version >= 27) {
|
2133
|
+
VALUE hash = rb_hash_new();
|
2134
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("name")), tok(lexer, tm, te));
|
2135
|
+
diagnostic(lexer, severity_error, diag_msg, hash, range(lexer, tm, te), empty_array);
|
2136
|
+
} else {
|
2137
|
+
emit_token(lexer, tCOLON, tok(lexer, ts, ts + 1), ts, ts + 1);
|
2138
|
+
p = ts;
|
2139
|
+
}
|
2140
|
+
|
2141
|
+
fnext expr_end; fbreak;
|
2142
|
+
};
|
2143
|
+
|
2144
|
+
|
2018
2145
|
'?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
|
2019
2146
|
| (c_any - c_space_nl - e_bs) % { lexer->escape = Qnil; }
|
2020
2147
|
) => {
|
data/lib/c_lexer.rb
CHANGED
@@ -74,6 +74,7 @@ module Parser
|
|
74
74
|
@lexer = ::Parser::CLexer.new(version)
|
75
75
|
@lexer.diagnostics = @diagnostics
|
76
76
|
@lexer.static_env = @static_env
|
77
|
+
@lexer.context = @context
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
@@ -83,6 +84,7 @@ module Parser
|
|
83
84
|
@lexer = ::Parser::CLexer.new(version)
|
84
85
|
@lexer.diagnostics = @diagnostics
|
85
86
|
@lexer.static_env = @static_env
|
87
|
+
@lexer.context = @context
|
86
88
|
end
|
87
89
|
end
|
88
90
|
|
@@ -92,6 +94,7 @@ module Parser
|
|
92
94
|
@lexer = ::Parser::CLexer.new(version)
|
93
95
|
@lexer.diagnostics = @diagnostics
|
94
96
|
@lexer.static_env = @static_env
|
97
|
+
@lexer.context = @context
|
95
98
|
end
|
96
99
|
end
|
97
100
|
end
|
data/lib/c_lexer/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: c_lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.6.
|
4
|
+
version: 2.6.4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Bylich
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.6.
|
33
|
+
version: 2.6.4.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 2.6.
|
40
|
+
version: 2.6.4.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|