c_lexer 2.6.3.0.0 → 2.6.4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/c_lexer.gemspec +1 -1
- data/ext/lexer/lexer.c +10837 -10508
- data/ext/lexer/lexer.h +12 -2
- data/ext/lexer/lexer.rl +133 -6
- data/lib/c_lexer.rb +3 -0
- data/lib/c_lexer/version.rb +1 -1
- metadata +3 -3
data/ext/lexer/lexer.h
CHANGED
@@ -50,6 +50,8 @@ struct Lexer {
|
|
50
50
|
VALUE escape;
|
51
51
|
|
52
52
|
int cs_before_block_comment;
|
53
|
+
|
54
|
+
VALUE max_numparam_stack;
|
53
55
|
};
|
54
56
|
|
55
57
|
static void lexer_mark(void*);
|
@@ -229,6 +231,7 @@ VALUE tNEQ;
|
|
229
231
|
VALUE tNL;
|
230
232
|
VALUE tNMATCH;
|
231
233
|
VALUE tNTH_REF;
|
234
|
+
VALUE tNUMPARAM;
|
232
235
|
VALUE tOP_ASGN;
|
233
236
|
VALUE tOROP;
|
234
237
|
VALUE tPERCENT;
|
@@ -268,6 +271,7 @@ VALUE tXSTRING_BEG;
|
|
268
271
|
VALUE comment_klass;
|
269
272
|
VALUE diagnostic_klass;
|
270
273
|
VALUE range_klass;
|
274
|
+
VALUE max_numparam_stack_klass;
|
271
275
|
|
272
276
|
VALUE severity_error;
|
273
277
|
VALUE fatal;
|
@@ -281,6 +285,8 @@ VALUE cvar_name;
|
|
281
285
|
VALUE embedded_document;
|
282
286
|
VALUE empty_numeric;
|
283
287
|
VALUE escape_eof;
|
288
|
+
VALUE heredoc_id_ends_with_nl;
|
289
|
+
VALUE heredoc_id_has_newline;
|
284
290
|
VALUE incomplete_escape;
|
285
291
|
VALUE invalid_escape;
|
286
292
|
VALUE invalid_escape_use;
|
@@ -288,22 +294,26 @@ VALUE invalid_hex_escape;
|
|
288
294
|
VALUE invalid_octal;
|
289
295
|
VALUE invalid_unicode_escape;
|
290
296
|
VALUE ivar_name;
|
291
|
-
VALUE
|
292
|
-
VALUE heredoc_id_has_newline;
|
297
|
+
VALUE leading_zero_in_numparam;
|
293
298
|
VALUE no_dot_digit_literal;
|
299
|
+
VALUE numparam_outside_block;
|
300
|
+
VALUE ordinary_param_defined;
|
294
301
|
VALUE prefix;
|
295
302
|
VALUE regexp_options;
|
296
303
|
VALUE string_eof;
|
304
|
+
VALUE too_large_numparam;
|
297
305
|
VALUE trailing_in_number;
|
298
306
|
VALUE unexpected;
|
299
307
|
VALUE unexpected_percent_str;
|
300
308
|
VALUE unicode_point_too_large;
|
309
|
+
VALUE unterminated_heredoc_id;
|
301
310
|
VALUE unterminated_unicode;
|
302
311
|
|
303
312
|
VALUE empty_array;
|
304
313
|
VALUE blank_string;
|
305
314
|
VALUE newline;
|
306
315
|
VALUE escaped_newline;
|
316
|
+
VALUE slash_r;
|
307
317
|
VALUE utf8_encoding;
|
308
318
|
VALUE cr_then_anything_to_eol;
|
309
319
|
VALUE crs_to_eol;
|
data/ext/lexer/lexer.rl
CHANGED
@@ -12,6 +12,7 @@
|
|
12
12
|
#define GET_LEXER(self) Data_Get_Struct(self, Lexer, lexer)
|
13
13
|
#define STATIC_ENV_DECLARED(name) \
|
14
14
|
lexer->static_env != Qnil && RTEST(rb_funcall(lexer->static_env, rb_intern("declared?"), 1, name))
|
15
|
+
#define NUMPARAM_MAX 100
|
15
16
|
|
16
17
|
#include "stack_state/cmdarg.h"
|
17
18
|
#include "stack_state/cond.h"
|
@@ -44,6 +45,7 @@ static VALUE lexer_alloc(VALUE klass)
|
|
44
45
|
lexer->comments = Qnil;
|
45
46
|
lexer->encoding = Qnil;
|
46
47
|
lexer->escape = Qnil;
|
48
|
+
lexer->max_numparam_stack = Qnil;
|
47
49
|
|
48
50
|
ss_stack_init(&lexer->cond_stack);
|
49
51
|
ss_stack_init(&lexer->cmdarg_stack);
|
@@ -66,6 +68,7 @@ static void lexer_mark(void *ptr)
|
|
66
68
|
rb_gc_mark(lexer->comments);
|
67
69
|
rb_gc_mark(lexer->encoding);
|
68
70
|
rb_gc_mark(lexer->escape);
|
71
|
+
rb_gc_mark(lexer->max_numparam_stack);
|
69
72
|
|
70
73
|
for (literal *lit = lexer->literal_stack.bottom; lit < lexer->literal_stack.top; lit++) {
|
71
74
|
rb_gc_mark(lit->buffer);
|
@@ -157,6 +160,8 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
|
|
157
160
|
|
158
161
|
lexer->cs_before_block_comment = lex_en_line_begin;
|
159
162
|
|
163
|
+
lexer->max_numparam_stack = rb_class_new_instance(0, NULL, max_numparam_stack_klass) ;
|
164
|
+
|
160
165
|
return self;
|
161
166
|
}
|
162
167
|
|
@@ -306,6 +311,18 @@ static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
|
|
306
311
|
return val;
|
307
312
|
}
|
308
313
|
|
314
|
+
static VALUE lexer_max_numparam_stack(VALUE self)
|
315
|
+
{
|
316
|
+
Lexer* lexer = GET_LEXER(self);
|
317
|
+
return lexer->max_numparam_stack;
|
318
|
+
}
|
319
|
+
|
320
|
+
static VALUE lexer_max_numparam(VALUE self)
|
321
|
+
{
|
322
|
+
Lexer* lexer = GET_LEXER(self);
|
323
|
+
return rb_funcall(lexer->max_numparam_stack, rb_intern("top"), 0);
|
324
|
+
}
|
325
|
+
|
309
326
|
static VALUE lexer_get_dedent_level(VALUE self)
|
310
327
|
{
|
311
328
|
Lexer* lexer = GET_LEXER(self);
|
@@ -335,6 +352,7 @@ static VALUE lexer_advance(VALUE self)
|
|
335
352
|
long ident_ts = 0, ident_te = 0;
|
336
353
|
long numeric_s = 0;
|
337
354
|
Data_Get_Struct(self, Lexer, lexer);
|
355
|
+
VALUE diag_msg;
|
338
356
|
|
339
357
|
if (RARRAY_LEN(lexer->token_queue) > 0)
|
340
358
|
return rb_ary_shift(lexer->token_queue);
|
@@ -788,9 +806,9 @@ void Init_lexer()
|
|
788
806
|
init_symbol(tASSOC);
|
789
807
|
init_symbol(tBACK_REF);
|
790
808
|
init_symbol(tBACK_REF2);
|
809
|
+
init_symbol(tBANG);
|
791
810
|
init_symbol(tBDOT2);
|
792
811
|
init_symbol(tBDOT3);
|
793
|
-
init_symbol(tBANG);
|
794
812
|
init_symbol(tCARET);
|
795
813
|
init_symbol(tCHARACTER);
|
796
814
|
init_symbol(tCMP);
|
@@ -841,6 +859,7 @@ void Init_lexer()
|
|
841
859
|
init_symbol(tNL);
|
842
860
|
init_symbol(tNMATCH);
|
843
861
|
init_symbol(tNTH_REF);
|
862
|
+
init_symbol(tNUMPARAM);
|
844
863
|
init_symbol(tOP_ASGN);
|
845
864
|
init_symbol(tOROP);
|
846
865
|
init_symbol(tPERCENT);
|
@@ -890,6 +909,8 @@ void Init_lexer()
|
|
890
909
|
init_symbol(embedded_document);
|
891
910
|
init_symbol(empty_numeric);
|
892
911
|
init_symbol(escape_eof);
|
912
|
+
init_symbol(heredoc_id_ends_with_nl);
|
913
|
+
init_symbol(heredoc_id_has_newline);
|
893
914
|
init_symbol(incomplete_escape);
|
894
915
|
init_symbol(invalid_escape);
|
895
916
|
init_symbol(invalid_escape_use);
|
@@ -897,16 +918,19 @@ void Init_lexer()
|
|
897
918
|
init_symbol(invalid_octal);
|
898
919
|
init_symbol(invalid_unicode_escape);
|
899
920
|
init_symbol(ivar_name);
|
900
|
-
init_symbol(
|
901
|
-
init_symbol(heredoc_id_has_newline);
|
921
|
+
init_symbol(leading_zero_in_numparam);
|
902
922
|
init_symbol(no_dot_digit_literal);
|
923
|
+
init_symbol(numparam_outside_block);
|
924
|
+
init_symbol(ordinary_param_defined);
|
903
925
|
init_symbol(prefix);
|
904
926
|
init_symbol(regexp_options);
|
905
927
|
init_symbol(string_eof);
|
928
|
+
init_symbol(too_large_numparam);
|
906
929
|
init_symbol(trailing_in_number);
|
907
930
|
init_symbol(unexpected);
|
908
931
|
init_symbol(unexpected_percent_str);
|
909
932
|
init_symbol(unicode_point_too_large);
|
933
|
+
init_symbol(unterminated_heredoc_id);
|
910
934
|
init_symbol(unterminated_unicode);
|
911
935
|
|
912
936
|
VALUE m_Parser = rb_define_module("Parser");
|
@@ -958,12 +982,17 @@ void Init_lexer()
|
|
958
982
|
rb_define_method(c_Lexer, "source_buffer=", lexer_set_source_buffer, 1);
|
959
983
|
rb_define_method(c_Lexer, "force_utf32=", lexer_set_force_utf32, 1);
|
960
984
|
|
985
|
+
rb_define_method(c_Lexer, "max_numparam_stack", lexer_max_numparam_stack, 0);
|
986
|
+
rb_define_method(c_Lexer, "max_numparam", lexer_max_numparam, 0);
|
987
|
+
|
961
988
|
rb_define_attr(c_Lexer, "context", 1, 1);
|
962
989
|
|
963
990
|
VALUE m_Source = rb_const_get(m_Parser, rb_intern("Source"));
|
964
991
|
comment_klass = rb_const_get(m_Source, rb_intern("Comment"));
|
965
992
|
diagnostic_klass = rb_const_get(m_Parser, rb_intern("Diagnostic"));
|
966
993
|
range_klass = rb_const_get(m_Source, rb_intern("Range"));
|
994
|
+
VALUE lexer_class = rb_const_get(m_Parser, rb_intern("Lexer"));
|
995
|
+
max_numparam_stack_klass = rb_const_get(lexer_class, rb_intern("MaxNumparamStack"));
|
967
996
|
|
968
997
|
empty_array = rb_obj_freeze(rb_ary_new2(0));
|
969
998
|
rb_gc_register_address(&empty_array);
|
@@ -973,6 +1002,8 @@ void Init_lexer()
|
|
973
1002
|
rb_gc_register_address(&newline);
|
974
1003
|
escaped_newline = rb_obj_freeze(rb_str_new2("\\\n"));
|
975
1004
|
rb_gc_register_address(&escaped_newline);
|
1005
|
+
slash_r = rb_obj_freeze(rb_str_new2("\r"));
|
1006
|
+
rb_gc_register_address(&slash_r);
|
976
1007
|
|
977
1008
|
if (rb_const_defined(rb_cObject, rb_intern("Encoding"))) {
|
978
1009
|
VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
|
@@ -1170,6 +1201,21 @@ void Init_lexer()
|
|
1170
1201
|
empty_array);
|
1171
1202
|
}
|
1172
1203
|
|
1204
|
+
action read_post_meta_or_ctrl_char {
|
1205
|
+
VALUE codepoint = rb_funcall(lexer->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
|
1206
|
+
lexer->escape = rb_funcall(codepoint, rb_intern("chr"), 0);
|
1207
|
+
int codepoint_i = FIX2INT(rb_funcall(codepoint, rb_intern("ord"), 0));
|
1208
|
+
|
1209
|
+
if (
|
1210
|
+
lexer->version >= 27 && (
|
1211
|
+
(codepoint_i >= 0 && codepoint_i <= 8) ||
|
1212
|
+
(codepoint_i >= 14 && codepoint_i <= 31)
|
1213
|
+
)
|
1214
|
+
) {
|
1215
|
+
diagnostic(lexer, fatal, invalid_escape, Qnil, range(lexer, ts, te), empty_array);
|
1216
|
+
}
|
1217
|
+
}
|
1218
|
+
|
1173
1219
|
action slash_c_char {
|
1174
1220
|
char c = *RSTRING_PTR(lexer->escape) & 0x9f;
|
1175
1221
|
lexer->escape = rb_str_new(&c, 1);
|
@@ -1184,13 +1230,13 @@ void Init_lexer()
|
|
1184
1230
|
|
1185
1231
|
maybe_escaped_char = (
|
1186
1232
|
'\\' c_any %unescape_char
|
1187
|
-
| ( c_any - [\\] ) %
|
1233
|
+
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
|
1188
1234
|
);
|
1189
1235
|
|
1190
1236
|
maybe_escaped_ctrl_char = (
|
1191
1237
|
'\\' c_any %unescape_char %slash_c_char
|
1192
1238
|
| '?' % { lexer->escape = rb_str_new2("\x7f"); }
|
1193
|
-
| ( c_any - [\\?] ) %
|
1239
|
+
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
|
1194
1240
|
);
|
1195
1241
|
|
1196
1242
|
escape = (
|
@@ -1636,6 +1682,47 @@ void Init_lexer()
|
|
1636
1682
|
fnext *stack[--top]; fbreak;
|
1637
1683
|
};
|
1638
1684
|
|
1685
|
+
'@' [0-9]+
|
1686
|
+
=> {
|
1687
|
+
VALUE token = tok(lexer, ts, te);
|
1688
|
+
|
1689
|
+
if (lexer->version < 27) {
|
1690
|
+
VALUE hash = rb_hash_new();
|
1691
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("name")), token);
|
1692
|
+
diagnostic(lexer, severity_error, ivar_name, hash, range(lexer, ts, te), empty_array);
|
1693
|
+
}
|
1694
|
+
|
1695
|
+
VALUE value = rb_funcall(token, rb_intern("[]"), 1, rb_range_new(INT2NUM(1), INT2NUM(-1), 0));
|
1696
|
+
VALUE int_value = rb_funcall(value, rb_intern("to_i"), 0);
|
1697
|
+
|
1698
|
+
if (*RSTRING_PTR(value) == '0') {
|
1699
|
+
diagnostic(lexer, severity_error, leading_zero_in_numparam, Qnil, range(lexer, ts, te), empty_array);
|
1700
|
+
}
|
1701
|
+
|
1702
|
+
if (FIX2INT(int_value) > NUMPARAM_MAX) {
|
1703
|
+
diagnostic(lexer, severity_error, too_large_numparam, Qnil, range(lexer, ts, te), empty_array);
|
1704
|
+
}
|
1705
|
+
|
1706
|
+
VALUE context = rb_iv_get(self, "@context");
|
1707
|
+
int in_block = RTEST(rb_funcall(context, rb_intern("in_block?"), 0));
|
1708
|
+
int in_lambda = RTEST(rb_funcall(context, rb_intern("in_lambda?"), 0));
|
1709
|
+
|
1710
|
+
if (!in_block && !in_lambda) {
|
1711
|
+
diagnostic(lexer, severity_error, numparam_outside_block, Qnil, range(lexer, ts, te), empty_array);
|
1712
|
+
}
|
1713
|
+
|
1714
|
+
VALUE max_numparam_stack = lexer->max_numparam_stack;
|
1715
|
+
int can_have_numparams = RTEST(rb_funcall(max_numparam_stack, rb_intern("can_have_numparams?"), 0));
|
1716
|
+
if (!can_have_numparams) {
|
1717
|
+
diagnostic(lexer, severity_error, ordinary_param_defined, Qnil, range(lexer, ts, te), empty_array);
|
1718
|
+
}
|
1719
|
+
|
1720
|
+
rb_funcall(max_numparam_stack, rb_intern("register"), 1, int_value);
|
1721
|
+
|
1722
|
+
emit_token(lexer, tNUMPARAM, tok(lexer, ts + 1, te), ts, te);
|
1723
|
+
fnext *stack[--top]; fbreak;
|
1724
|
+
};
|
1725
|
+
|
1639
1726
|
instance_var_v => {
|
1640
1727
|
VALUE str = tok(lexer, ts, te);
|
1641
1728
|
|
@@ -1960,7 +2047,15 @@ void Init_lexer()
|
|
1960
2047
|
|
1961
2048
|
VALUE delimiter = tok(lexer, rng_s, rng_e);
|
1962
2049
|
|
1963
|
-
if (lexer->version >=
|
2050
|
+
if (lexer->version >= 27) {
|
2051
|
+
int newlines_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline));
|
2052
|
+
int slash_r_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, slash_r));
|
2053
|
+
|
2054
|
+
if (newlines_count > 0 || slash_r_count > 0) {
|
2055
|
+
diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil,
|
2056
|
+
range(lexer, ts, ts + 1), empty_array);
|
2057
|
+
}
|
2058
|
+
} else if (lexer->version >= 24) {
|
1964
2059
|
if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
|
1965
2060
|
if (str_end_with_p(delimiter, "\n")) {
|
1966
2061
|
diagnostic(lexer, warning, heredoc_id_ends_with_nl, Qnil,
|
@@ -1985,6 +2080,21 @@ void Init_lexer()
|
|
1985
2080
|
}
|
1986
2081
|
};
|
1987
2082
|
|
2083
|
+
# Escaped unterminated heredoc start
|
2084
|
+
# <<'END | <<"END | <<`END |
|
2085
|
+
# <<-'END | <<-"END | <<-`END |
|
2086
|
+
# <<~'END | <<~"END | <<~`END
|
2087
|
+
#
|
2088
|
+
# If the heredoc is terminated the rule above should handle it
|
2089
|
+
'<<' [~\-]?
|
2090
|
+
('"' (any - c_nl - '"')*
|
2091
|
+
|"'" (any - c_nl - "'")*
|
2092
|
+
|"`" (any - c_nl - "`")
|
2093
|
+
)
|
2094
|
+
=> {
|
2095
|
+
diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil, range(lexer, ts, ts + 1), empty_array);
|
2096
|
+
};
|
2097
|
+
|
1988
2098
|
':' ('&&' | '||') => {
|
1989
2099
|
fhold; fhold;
|
1990
2100
|
emit_token(lexer, tSYMBEG, tok(lexer, ts, ts + 1), ts, ts + 1);
|
@@ -2015,6 +2125,23 @@ void Init_lexer()
|
|
2015
2125
|
fnext expr_end; fbreak;
|
2016
2126
|
};
|
2017
2127
|
|
2128
|
+
':' ( '@' %{ tm = p - 1; diag_msg = ivar_name; }
|
2129
|
+
| '@@' %{ tm = p - 2; diag_msg = cvar_name; }
|
2130
|
+
) [0-9]*
|
2131
|
+
=> {
|
2132
|
+
if (lexer->version >= 27) {
|
2133
|
+
VALUE hash = rb_hash_new();
|
2134
|
+
rb_hash_aset(hash, ID2SYM(rb_intern("name")), tok(lexer, tm, te));
|
2135
|
+
diagnostic(lexer, severity_error, diag_msg, hash, range(lexer, tm, te), empty_array);
|
2136
|
+
} else {
|
2137
|
+
emit_token(lexer, tCOLON, tok(lexer, ts, ts + 1), ts, ts + 1);
|
2138
|
+
p = ts;
|
2139
|
+
}
|
2140
|
+
|
2141
|
+
fnext expr_end; fbreak;
|
2142
|
+
};
|
2143
|
+
|
2144
|
+
|
2018
2145
|
'?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
|
2019
2146
|
| (c_any - c_space_nl - e_bs) % { lexer->escape = Qnil; }
|
2020
2147
|
) => {
|
data/lib/c_lexer.rb
CHANGED
@@ -74,6 +74,7 @@ module Parser
|
|
74
74
|
@lexer = ::Parser::CLexer.new(version)
|
75
75
|
@lexer.diagnostics = @diagnostics
|
76
76
|
@lexer.static_env = @static_env
|
77
|
+
@lexer.context = @context
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
@@ -83,6 +84,7 @@ module Parser
|
|
83
84
|
@lexer = ::Parser::CLexer.new(version)
|
84
85
|
@lexer.diagnostics = @diagnostics
|
85
86
|
@lexer.static_env = @static_env
|
87
|
+
@lexer.context = @context
|
86
88
|
end
|
87
89
|
end
|
88
90
|
|
@@ -92,6 +94,7 @@ module Parser
|
|
92
94
|
@lexer = ::Parser::CLexer.new(version)
|
93
95
|
@lexer.diagnostics = @diagnostics
|
94
96
|
@lexer.static_env = @static_env
|
97
|
+
@lexer.context = @context
|
95
98
|
end
|
96
99
|
end
|
97
100
|
end
|
data/lib/c_lexer/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: c_lexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.6.
|
4
|
+
version: 2.6.4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ilya Bylich
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 2.6.
|
33
|
+
version: 2.6.4.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 2.6.
|
40
|
+
version: 2.6.4.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: bundler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|