c_lexer 2.6.3.0.0 → 2.6.4.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/lexer/lexer.h CHANGED
@@ -50,6 +50,8 @@ struct Lexer {
50
50
  VALUE escape;
51
51
 
52
52
  int cs_before_block_comment;
53
+
54
+ VALUE max_numparam_stack;
53
55
  };
54
56
 
55
57
  static void lexer_mark(void*);
@@ -229,6 +231,7 @@ VALUE tNEQ;
229
231
  VALUE tNL;
230
232
  VALUE tNMATCH;
231
233
  VALUE tNTH_REF;
234
+ VALUE tNUMPARAM;
232
235
  VALUE tOP_ASGN;
233
236
  VALUE tOROP;
234
237
  VALUE tPERCENT;
@@ -268,6 +271,7 @@ VALUE tXSTRING_BEG;
268
271
  VALUE comment_klass;
269
272
  VALUE diagnostic_klass;
270
273
  VALUE range_klass;
274
+ VALUE max_numparam_stack_klass;
271
275
 
272
276
  VALUE severity_error;
273
277
  VALUE fatal;
@@ -281,6 +285,8 @@ VALUE cvar_name;
281
285
  VALUE embedded_document;
282
286
  VALUE empty_numeric;
283
287
  VALUE escape_eof;
288
+ VALUE heredoc_id_ends_with_nl;
289
+ VALUE heredoc_id_has_newline;
284
290
  VALUE incomplete_escape;
285
291
  VALUE invalid_escape;
286
292
  VALUE invalid_escape_use;
@@ -288,22 +294,26 @@ VALUE invalid_hex_escape;
288
294
  VALUE invalid_octal;
289
295
  VALUE invalid_unicode_escape;
290
296
  VALUE ivar_name;
291
- VALUE heredoc_id_ends_with_nl;
292
- VALUE heredoc_id_has_newline;
297
+ VALUE leading_zero_in_numparam;
293
298
  VALUE no_dot_digit_literal;
299
+ VALUE numparam_outside_block;
300
+ VALUE ordinary_param_defined;
294
301
  VALUE prefix;
295
302
  VALUE regexp_options;
296
303
  VALUE string_eof;
304
+ VALUE too_large_numparam;
297
305
  VALUE trailing_in_number;
298
306
  VALUE unexpected;
299
307
  VALUE unexpected_percent_str;
300
308
  VALUE unicode_point_too_large;
309
+ VALUE unterminated_heredoc_id;
301
310
  VALUE unterminated_unicode;
302
311
 
303
312
  VALUE empty_array;
304
313
  VALUE blank_string;
305
314
  VALUE newline;
306
315
  VALUE escaped_newline;
316
+ VALUE slash_r;
307
317
  VALUE utf8_encoding;
308
318
  VALUE cr_then_anything_to_eol;
309
319
  VALUE crs_to_eol;
data/ext/lexer/lexer.rl CHANGED
@@ -12,6 +12,7 @@
12
12
  #define GET_LEXER(self) Data_Get_Struct(self, Lexer, lexer)
13
13
  #define STATIC_ENV_DECLARED(name) \
14
14
  lexer->static_env != Qnil && RTEST(rb_funcall(lexer->static_env, rb_intern("declared?"), 1, name))
15
+ #define NUMPARAM_MAX 100
15
16
 
16
17
  #include "stack_state/cmdarg.h"
17
18
  #include "stack_state/cond.h"
@@ -44,6 +45,7 @@ static VALUE lexer_alloc(VALUE klass)
44
45
  lexer->comments = Qnil;
45
46
  lexer->encoding = Qnil;
46
47
  lexer->escape = Qnil;
48
+ lexer->max_numparam_stack = Qnil;
47
49
 
48
50
  ss_stack_init(&lexer->cond_stack);
49
51
  ss_stack_init(&lexer->cmdarg_stack);
@@ -66,6 +68,7 @@ static void lexer_mark(void *ptr)
66
68
  rb_gc_mark(lexer->comments);
67
69
  rb_gc_mark(lexer->encoding);
68
70
  rb_gc_mark(lexer->escape);
71
+ rb_gc_mark(lexer->max_numparam_stack);
69
72
 
70
73
  for (literal *lit = lexer->literal_stack.bottom; lit < lexer->literal_stack.top; lit++) {
71
74
  rb_gc_mark(lit->buffer);
@@ -157,6 +160,8 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
157
160
 
158
161
  lexer->cs_before_block_comment = lex_en_line_begin;
159
162
 
163
+ lexer->max_numparam_stack = rb_class_new_instance(0, NULL, max_numparam_stack_klass) ;
164
+
160
165
  return self;
161
166
  }
162
167
 
@@ -306,6 +311,18 @@ static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
306
311
  return val;
307
312
  }
308
313
 
314
+ static VALUE lexer_max_numparam_stack(VALUE self)
315
+ {
316
+ Lexer* lexer = GET_LEXER(self);
317
+ return lexer->max_numparam_stack;
318
+ }
319
+
320
+ static VALUE lexer_max_numparam(VALUE self)
321
+ {
322
+ Lexer* lexer = GET_LEXER(self);
323
+ return rb_funcall(lexer->max_numparam_stack, rb_intern("top"), 0);
324
+ }
325
+
309
326
  static VALUE lexer_get_dedent_level(VALUE self)
310
327
  {
311
328
  Lexer* lexer = GET_LEXER(self);
@@ -335,6 +352,7 @@ static VALUE lexer_advance(VALUE self)
335
352
  long ident_ts = 0, ident_te = 0;
336
353
  long numeric_s = 0;
337
354
  Data_Get_Struct(self, Lexer, lexer);
355
+ VALUE diag_msg;
338
356
 
339
357
  if (RARRAY_LEN(lexer->token_queue) > 0)
340
358
  return rb_ary_shift(lexer->token_queue);
@@ -788,9 +806,9 @@ void Init_lexer()
788
806
  init_symbol(tASSOC);
789
807
  init_symbol(tBACK_REF);
790
808
  init_symbol(tBACK_REF2);
809
+ init_symbol(tBANG);
791
810
  init_symbol(tBDOT2);
792
811
  init_symbol(tBDOT3);
793
- init_symbol(tBANG);
794
812
  init_symbol(tCARET);
795
813
  init_symbol(tCHARACTER);
796
814
  init_symbol(tCMP);
@@ -841,6 +859,7 @@ void Init_lexer()
841
859
  init_symbol(tNL);
842
860
  init_symbol(tNMATCH);
843
861
  init_symbol(tNTH_REF);
862
+ init_symbol(tNUMPARAM);
844
863
  init_symbol(tOP_ASGN);
845
864
  init_symbol(tOROP);
846
865
  init_symbol(tPERCENT);
@@ -890,6 +909,8 @@ void Init_lexer()
890
909
  init_symbol(embedded_document);
891
910
  init_symbol(empty_numeric);
892
911
  init_symbol(escape_eof);
912
+ init_symbol(heredoc_id_ends_with_nl);
913
+ init_symbol(heredoc_id_has_newline);
893
914
  init_symbol(incomplete_escape);
894
915
  init_symbol(invalid_escape);
895
916
  init_symbol(invalid_escape_use);
@@ -897,16 +918,19 @@ void Init_lexer()
897
918
  init_symbol(invalid_octal);
898
919
  init_symbol(invalid_unicode_escape);
899
920
  init_symbol(ivar_name);
900
- init_symbol(heredoc_id_ends_with_nl);
901
- init_symbol(heredoc_id_has_newline);
921
+ init_symbol(leading_zero_in_numparam);
902
922
  init_symbol(no_dot_digit_literal);
923
+ init_symbol(numparam_outside_block);
924
+ init_symbol(ordinary_param_defined);
903
925
  init_symbol(prefix);
904
926
  init_symbol(regexp_options);
905
927
  init_symbol(string_eof);
928
+ init_symbol(too_large_numparam);
906
929
  init_symbol(trailing_in_number);
907
930
  init_symbol(unexpected);
908
931
  init_symbol(unexpected_percent_str);
909
932
  init_symbol(unicode_point_too_large);
933
+ init_symbol(unterminated_heredoc_id);
910
934
  init_symbol(unterminated_unicode);
911
935
 
912
936
  VALUE m_Parser = rb_define_module("Parser");
@@ -958,12 +982,17 @@ void Init_lexer()
958
982
  rb_define_method(c_Lexer, "source_buffer=", lexer_set_source_buffer, 1);
959
983
  rb_define_method(c_Lexer, "force_utf32=", lexer_set_force_utf32, 1);
960
984
 
985
+ rb_define_method(c_Lexer, "max_numparam_stack", lexer_max_numparam_stack, 0);
986
+ rb_define_method(c_Lexer, "max_numparam", lexer_max_numparam, 0);
987
+
961
988
  rb_define_attr(c_Lexer, "context", 1, 1);
962
989
 
963
990
  VALUE m_Source = rb_const_get(m_Parser, rb_intern("Source"));
964
991
  comment_klass = rb_const_get(m_Source, rb_intern("Comment"));
965
992
  diagnostic_klass = rb_const_get(m_Parser, rb_intern("Diagnostic"));
966
993
  range_klass = rb_const_get(m_Source, rb_intern("Range"));
994
+ VALUE lexer_class = rb_const_get(m_Parser, rb_intern("Lexer"));
995
+ max_numparam_stack_klass = rb_const_get(lexer_class, rb_intern("MaxNumparamStack"));
967
996
 
968
997
  empty_array = rb_obj_freeze(rb_ary_new2(0));
969
998
  rb_gc_register_address(&empty_array);
@@ -973,6 +1002,8 @@ void Init_lexer()
973
1002
  rb_gc_register_address(&newline);
974
1003
  escaped_newline = rb_obj_freeze(rb_str_new2("\\\n"));
975
1004
  rb_gc_register_address(&escaped_newline);
1005
+ slash_r = rb_obj_freeze(rb_str_new2("\r"));
1006
+ rb_gc_register_address(&slash_r);
976
1007
 
977
1008
  if (rb_const_defined(rb_cObject, rb_intern("Encoding"))) {
978
1009
  VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
@@ -1170,6 +1201,21 @@ void Init_lexer()
1170
1201
  empty_array);
1171
1202
  }
1172
1203
 
1204
+ action read_post_meta_or_ctrl_char {
1205
+ VALUE codepoint = rb_funcall(lexer->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
1206
+ lexer->escape = rb_funcall(codepoint, rb_intern("chr"), 0);
1207
+ int codepoint_i = FIX2INT(rb_funcall(codepoint, rb_intern("ord"), 0));
1208
+
1209
+ if (
1210
+ lexer->version >= 27 && (
1211
+ (codepoint_i >= 0 && codepoint_i <= 8) ||
1212
+ (codepoint_i >= 14 && codepoint_i <= 31)
1213
+ )
1214
+ ) {
1215
+ diagnostic(lexer, fatal, invalid_escape, Qnil, range(lexer, ts, te), empty_array);
1216
+ }
1217
+ }
1218
+
1173
1219
  action slash_c_char {
1174
1220
  char c = *RSTRING_PTR(lexer->escape) & 0x9f;
1175
1221
  lexer->escape = rb_str_new(&c, 1);
@@ -1184,13 +1230,13 @@ void Init_lexer()
1184
1230
 
1185
1231
  maybe_escaped_char = (
1186
1232
  '\\' c_any %unescape_char
1187
- | ( c_any - [\\] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); }
1233
+ | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
1188
1234
  );
1189
1235
 
1190
1236
  maybe_escaped_ctrl_char = (
1191
1237
  '\\' c_any %unescape_char %slash_c_char
1192
1238
  | '?' % { lexer->escape = rb_str_new2("\x7f"); }
1193
- | ( c_any - [\\?] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); } %slash_c_char
1239
+ | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
1194
1240
  );
1195
1241
 
1196
1242
  escape = (
@@ -1636,6 +1682,47 @@ void Init_lexer()
1636
1682
  fnext *stack[--top]; fbreak;
1637
1683
  };
1638
1684
 
1685
+ '@' [0-9]+
1686
+ => {
1687
+ VALUE token = tok(lexer, ts, te);
1688
+
1689
+ if (lexer->version < 27) {
1690
+ VALUE hash = rb_hash_new();
1691
+ rb_hash_aset(hash, ID2SYM(rb_intern("name")), token);
1692
+ diagnostic(lexer, severity_error, ivar_name, hash, range(lexer, ts, te), empty_array);
1693
+ }
1694
+
1695
+ VALUE value = rb_funcall(token, rb_intern("[]"), 1, rb_range_new(INT2NUM(1), INT2NUM(-1), 0));
1696
+ VALUE int_value = rb_funcall(value, rb_intern("to_i"), 0);
1697
+
1698
+ if (*RSTRING_PTR(value) == '0') {
1699
+ diagnostic(lexer, severity_error, leading_zero_in_numparam, Qnil, range(lexer, ts, te), empty_array);
1700
+ }
1701
+
1702
+ if (FIX2INT(int_value) > NUMPARAM_MAX) {
1703
+ diagnostic(lexer, severity_error, too_large_numparam, Qnil, range(lexer, ts, te), empty_array);
1704
+ }
1705
+
1706
+ VALUE context = rb_iv_get(self, "@context");
1707
+ int in_block = RTEST(rb_funcall(context, rb_intern("in_block?"), 0));
1708
+ int in_lambda = RTEST(rb_funcall(context, rb_intern("in_lambda?"), 0));
1709
+
1710
+ if (!in_block && !in_lambda) {
1711
+ diagnostic(lexer, severity_error, numparam_outside_block, Qnil, range(lexer, ts, te), empty_array);
1712
+ }
1713
+
1714
+ VALUE max_numparam_stack = lexer->max_numparam_stack;
1715
+ int can_have_numparams = RTEST(rb_funcall(max_numparam_stack, rb_intern("can_have_numparams?"), 0));
1716
+ if (!can_have_numparams) {
1717
+ diagnostic(lexer, severity_error, ordinary_param_defined, Qnil, range(lexer, ts, te), empty_array);
1718
+ }
1719
+
1720
+ rb_funcall(max_numparam_stack, rb_intern("register"), 1, int_value);
1721
+
1722
+ emit_token(lexer, tNUMPARAM, tok(lexer, ts + 1, te), ts, te);
1723
+ fnext *stack[--top]; fbreak;
1724
+ };
1725
+
1639
1726
  instance_var_v => {
1640
1727
  VALUE str = tok(lexer, ts, te);
1641
1728
 
@@ -1960,7 +2047,15 @@ void Init_lexer()
1960
2047
 
1961
2048
  VALUE delimiter = tok(lexer, rng_s, rng_e);
1962
2049
 
1963
- if (lexer->version >= 24) {
2050
+ if (lexer->version >= 27) {
2051
+ int newlines_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline));
2052
+ int slash_r_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, slash_r));
2053
+
2054
+ if (newlines_count > 0 || slash_r_count > 0) {
2055
+ diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil,
2056
+ range(lexer, ts, ts + 1), empty_array);
2057
+ }
2058
+ } else if (lexer->version >= 24) {
1964
2059
  if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
1965
2060
  if (str_end_with_p(delimiter, "\n")) {
1966
2061
  diagnostic(lexer, warning, heredoc_id_ends_with_nl, Qnil,
@@ -1985,6 +2080,21 @@ void Init_lexer()
1985
2080
  }
1986
2081
  };
1987
2082
 
2083
+ # Escaped unterminated heredoc start
2084
+ # <<'END | <<"END | <<`END |
2085
+ # <<-'END | <<-"END | <<-`END |
2086
+ # <<~'END | <<~"END | <<~`END
2087
+ #
2088
+ # If the heredoc is terminated the rule above should handle it
2089
+ '<<' [~\-]?
2090
+ ('"' (any - c_nl - '"')*
2091
+ |"'" (any - c_nl - "'")*
2092
+ |"`" (any - c_nl - "`")
2093
+ )
2094
+ => {
2095
+ diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil, range(lexer, ts, ts + 1), empty_array);
2096
+ };
2097
+
1988
2098
  ':' ('&&' | '||') => {
1989
2099
  fhold; fhold;
1990
2100
  emit_token(lexer, tSYMBEG, tok(lexer, ts, ts + 1), ts, ts + 1);
@@ -2015,6 +2125,23 @@ void Init_lexer()
2015
2125
  fnext expr_end; fbreak;
2016
2126
  };
2017
2127
 
2128
+ ':' ( '@' %{ tm = p - 1; diag_msg = ivar_name; }
2129
+ | '@@' %{ tm = p - 2; diag_msg = cvar_name; }
2130
+ ) [0-9]*
2131
+ => {
2132
+ if (lexer->version >= 27) {
2133
+ VALUE hash = rb_hash_new();
2134
+ rb_hash_aset(hash, ID2SYM(rb_intern("name")), tok(lexer, tm, te));
2135
+ diagnostic(lexer, severity_error, diag_msg, hash, range(lexer, tm, te), empty_array);
2136
+ } else {
2137
+ emit_token(lexer, tCOLON, tok(lexer, ts, ts + 1), ts, ts + 1);
2138
+ p = ts;
2139
+ }
2140
+
2141
+ fnext expr_end; fbreak;
2142
+ };
2143
+
2144
+
2018
2145
  '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
2019
2146
  | (c_any - c_space_nl - e_bs) % { lexer->escape = Qnil; }
2020
2147
  ) => {
data/lib/c_lexer.rb CHANGED
@@ -74,6 +74,7 @@ module Parser
74
74
  @lexer = ::Parser::CLexer.new(version)
75
75
  @lexer.diagnostics = @diagnostics
76
76
  @lexer.static_env = @static_env
77
+ @lexer.context = @context
77
78
  end
78
79
  end
79
80
 
@@ -83,6 +84,7 @@ module Parser
83
84
  @lexer = ::Parser::CLexer.new(version)
84
85
  @lexer.diagnostics = @diagnostics
85
86
  @lexer.static_env = @static_env
87
+ @lexer.context = @context
86
88
  end
87
89
  end
88
90
 
@@ -92,6 +94,7 @@ module Parser
92
94
  @lexer = ::Parser::CLexer.new(version)
93
95
  @lexer.diagnostics = @diagnostics
94
96
  @lexer.static_env = @static_env
97
+ @lexer.context = @context
95
98
  end
96
99
  end
97
100
  end
@@ -1,3 +1,3 @@
1
1
  module CLexer
2
- VERSION = '2.6.3.0.0'
2
+ VERSION = '2.6.4.0.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: c_lexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.3.0.0
4
+ version: 2.6.4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Bylich
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 2.6.3.0
33
+ version: 2.6.4.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 2.6.3.0
40
+ version: 2.6.4.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement