c_lexer 2.6.3.0.0 → 2.6.4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/lexer/lexer.h CHANGED
@@ -50,6 +50,8 @@ struct Lexer {
50
50
  VALUE escape;
51
51
 
52
52
  int cs_before_block_comment;
53
+
54
+ VALUE max_numparam_stack;
53
55
  };
54
56
 
55
57
  static void lexer_mark(void*);
@@ -229,6 +231,7 @@ VALUE tNEQ;
229
231
  VALUE tNL;
230
232
  VALUE tNMATCH;
231
233
  VALUE tNTH_REF;
234
+ VALUE tNUMPARAM;
232
235
  VALUE tOP_ASGN;
233
236
  VALUE tOROP;
234
237
  VALUE tPERCENT;
@@ -268,6 +271,7 @@ VALUE tXSTRING_BEG;
268
271
  VALUE comment_klass;
269
272
  VALUE diagnostic_klass;
270
273
  VALUE range_klass;
274
+ VALUE max_numparam_stack_klass;
271
275
 
272
276
  VALUE severity_error;
273
277
  VALUE fatal;
@@ -281,6 +285,8 @@ VALUE cvar_name;
281
285
  VALUE embedded_document;
282
286
  VALUE empty_numeric;
283
287
  VALUE escape_eof;
288
+ VALUE heredoc_id_ends_with_nl;
289
+ VALUE heredoc_id_has_newline;
284
290
  VALUE incomplete_escape;
285
291
  VALUE invalid_escape;
286
292
  VALUE invalid_escape_use;
@@ -288,22 +294,26 @@ VALUE invalid_hex_escape;
288
294
  VALUE invalid_octal;
289
295
  VALUE invalid_unicode_escape;
290
296
  VALUE ivar_name;
291
- VALUE heredoc_id_ends_with_nl;
292
- VALUE heredoc_id_has_newline;
297
+ VALUE leading_zero_in_numparam;
293
298
  VALUE no_dot_digit_literal;
299
+ VALUE numparam_outside_block;
300
+ VALUE ordinary_param_defined;
294
301
  VALUE prefix;
295
302
  VALUE regexp_options;
296
303
  VALUE string_eof;
304
+ VALUE too_large_numparam;
297
305
  VALUE trailing_in_number;
298
306
  VALUE unexpected;
299
307
  VALUE unexpected_percent_str;
300
308
  VALUE unicode_point_too_large;
309
+ VALUE unterminated_heredoc_id;
301
310
  VALUE unterminated_unicode;
302
311
 
303
312
  VALUE empty_array;
304
313
  VALUE blank_string;
305
314
  VALUE newline;
306
315
  VALUE escaped_newline;
316
+ VALUE slash_r;
307
317
  VALUE utf8_encoding;
308
318
  VALUE cr_then_anything_to_eol;
309
319
  VALUE crs_to_eol;
data/ext/lexer/lexer.rl CHANGED
@@ -12,6 +12,7 @@
12
12
  #define GET_LEXER(self) Data_Get_Struct(self, Lexer, lexer)
13
13
  #define STATIC_ENV_DECLARED(name) \
14
14
  lexer->static_env != Qnil && RTEST(rb_funcall(lexer->static_env, rb_intern("declared?"), 1, name))
15
+ #define NUMPARAM_MAX 100
15
16
 
16
17
  #include "stack_state/cmdarg.h"
17
18
  #include "stack_state/cond.h"
@@ -44,6 +45,7 @@ static VALUE lexer_alloc(VALUE klass)
44
45
  lexer->comments = Qnil;
45
46
  lexer->encoding = Qnil;
46
47
  lexer->escape = Qnil;
48
+ lexer->max_numparam_stack = Qnil;
47
49
 
48
50
  ss_stack_init(&lexer->cond_stack);
49
51
  ss_stack_init(&lexer->cmdarg_stack);
@@ -66,6 +68,7 @@ static void lexer_mark(void *ptr)
66
68
  rb_gc_mark(lexer->comments);
67
69
  rb_gc_mark(lexer->encoding);
68
70
  rb_gc_mark(lexer->escape);
71
+ rb_gc_mark(lexer->max_numparam_stack);
69
72
 
70
73
  for (literal *lit = lexer->literal_stack.bottom; lit < lexer->literal_stack.top; lit++) {
71
74
  rb_gc_mark(lit->buffer);
@@ -157,6 +160,8 @@ static VALUE lexer_reset(int argc, VALUE *argv, VALUE self)
157
160
 
158
161
  lexer->cs_before_block_comment = lex_en_line_begin;
159
162
 
163
+ lexer->max_numparam_stack = rb_class_new_instance(0, NULL, max_numparam_stack_klass) ;
164
+
160
165
  return self;
161
166
  }
162
167
 
@@ -306,6 +311,18 @@ static VALUE lexer_set_in_kwarg(VALUE self, VALUE val)
306
311
  return val;
307
312
  }
308
313
 
314
+ static VALUE lexer_max_numparam_stack(VALUE self)
315
+ {
316
+ Lexer* lexer = GET_LEXER(self);
317
+ return lexer->max_numparam_stack;
318
+ }
319
+
320
+ static VALUE lexer_max_numparam(VALUE self)
321
+ {
322
+ Lexer* lexer = GET_LEXER(self);
323
+ return rb_funcall(lexer->max_numparam_stack, rb_intern("top"), 0);
324
+ }
325
+
309
326
  static VALUE lexer_get_dedent_level(VALUE self)
310
327
  {
311
328
  Lexer* lexer = GET_LEXER(self);
@@ -335,6 +352,7 @@ static VALUE lexer_advance(VALUE self)
335
352
  long ident_ts = 0, ident_te = 0;
336
353
  long numeric_s = 0;
337
354
  Data_Get_Struct(self, Lexer, lexer);
355
+ VALUE diag_msg;
338
356
 
339
357
  if (RARRAY_LEN(lexer->token_queue) > 0)
340
358
  return rb_ary_shift(lexer->token_queue);
@@ -788,9 +806,9 @@ void Init_lexer()
788
806
  init_symbol(tASSOC);
789
807
  init_symbol(tBACK_REF);
790
808
  init_symbol(tBACK_REF2);
809
+ init_symbol(tBANG);
791
810
  init_symbol(tBDOT2);
792
811
  init_symbol(tBDOT3);
793
- init_symbol(tBANG);
794
812
  init_symbol(tCARET);
795
813
  init_symbol(tCHARACTER);
796
814
  init_symbol(tCMP);
@@ -841,6 +859,7 @@ void Init_lexer()
841
859
  init_symbol(tNL);
842
860
  init_symbol(tNMATCH);
843
861
  init_symbol(tNTH_REF);
862
+ init_symbol(tNUMPARAM);
844
863
  init_symbol(tOP_ASGN);
845
864
  init_symbol(tOROP);
846
865
  init_symbol(tPERCENT);
@@ -890,6 +909,8 @@ void Init_lexer()
890
909
  init_symbol(embedded_document);
891
910
  init_symbol(empty_numeric);
892
911
  init_symbol(escape_eof);
912
+ init_symbol(heredoc_id_ends_with_nl);
913
+ init_symbol(heredoc_id_has_newline);
893
914
  init_symbol(incomplete_escape);
894
915
  init_symbol(invalid_escape);
895
916
  init_symbol(invalid_escape_use);
@@ -897,16 +918,19 @@ void Init_lexer()
897
918
  init_symbol(invalid_octal);
898
919
  init_symbol(invalid_unicode_escape);
899
920
  init_symbol(ivar_name);
900
- init_symbol(heredoc_id_ends_with_nl);
901
- init_symbol(heredoc_id_has_newline);
921
+ init_symbol(leading_zero_in_numparam);
902
922
  init_symbol(no_dot_digit_literal);
923
+ init_symbol(numparam_outside_block);
924
+ init_symbol(ordinary_param_defined);
903
925
  init_symbol(prefix);
904
926
  init_symbol(regexp_options);
905
927
  init_symbol(string_eof);
928
+ init_symbol(too_large_numparam);
906
929
  init_symbol(trailing_in_number);
907
930
  init_symbol(unexpected);
908
931
  init_symbol(unexpected_percent_str);
909
932
  init_symbol(unicode_point_too_large);
933
+ init_symbol(unterminated_heredoc_id);
910
934
  init_symbol(unterminated_unicode);
911
935
 
912
936
  VALUE m_Parser = rb_define_module("Parser");
@@ -958,12 +982,17 @@ void Init_lexer()
958
982
  rb_define_method(c_Lexer, "source_buffer=", lexer_set_source_buffer, 1);
959
983
  rb_define_method(c_Lexer, "force_utf32=", lexer_set_force_utf32, 1);
960
984
 
985
+ rb_define_method(c_Lexer, "max_numparam_stack", lexer_max_numparam_stack, 0);
986
+ rb_define_method(c_Lexer, "max_numparam", lexer_max_numparam, 0);
987
+
961
988
  rb_define_attr(c_Lexer, "context", 1, 1);
962
989
 
963
990
  VALUE m_Source = rb_const_get(m_Parser, rb_intern("Source"));
964
991
  comment_klass = rb_const_get(m_Source, rb_intern("Comment"));
965
992
  diagnostic_klass = rb_const_get(m_Parser, rb_intern("Diagnostic"));
966
993
  range_klass = rb_const_get(m_Source, rb_intern("Range"));
994
+ VALUE lexer_class = rb_const_get(m_Parser, rb_intern("Lexer"));
995
+ max_numparam_stack_klass = rb_const_get(lexer_class, rb_intern("MaxNumparamStack"));
967
996
 
968
997
  empty_array = rb_obj_freeze(rb_ary_new2(0));
969
998
  rb_gc_register_address(&empty_array);
@@ -973,6 +1002,8 @@ void Init_lexer()
973
1002
  rb_gc_register_address(&newline);
974
1003
  escaped_newline = rb_obj_freeze(rb_str_new2("\\\n"));
975
1004
  rb_gc_register_address(&escaped_newline);
1005
+ slash_r = rb_obj_freeze(rb_str_new2("\r"));
1006
+ rb_gc_register_address(&slash_r);
976
1007
 
977
1008
  if (rb_const_defined(rb_cObject, rb_intern("Encoding"))) {
978
1009
  VALUE encoding = rb_const_get(rb_cObject, rb_intern("Encoding"));
@@ -1170,6 +1201,21 @@ void Init_lexer()
1170
1201
  empty_array);
1171
1202
  }
1172
1203
 
1204
+ action read_post_meta_or_ctrl_char {
1205
+ VALUE codepoint = rb_funcall(lexer->source_buffer, rb_intern("slice"), 1, INT2NUM(p - 1));
1206
+ lexer->escape = rb_funcall(codepoint, rb_intern("chr"), 0);
1207
+ int codepoint_i = FIX2INT(rb_funcall(codepoint, rb_intern("ord"), 0));
1208
+
1209
+ if (
1210
+ lexer->version >= 27 && (
1211
+ (codepoint_i >= 0 && codepoint_i <= 8) ||
1212
+ (codepoint_i >= 14 && codepoint_i <= 31)
1213
+ )
1214
+ ) {
1215
+ diagnostic(lexer, fatal, invalid_escape, Qnil, range(lexer, ts, te), empty_array);
1216
+ }
1217
+ }
1218
+
1173
1219
  action slash_c_char {
1174
1220
  char c = *RSTRING_PTR(lexer->escape) & 0x9f;
1175
1221
  lexer->escape = rb_str_new(&c, 1);
@@ -1184,13 +1230,13 @@ void Init_lexer()
1184
1230
 
1185
1231
  maybe_escaped_char = (
1186
1232
  '\\' c_any %unescape_char
1187
- | ( c_any - [\\] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); }
1233
+ | ( c_any - [\\] ) %read_post_meta_or_ctrl_char
1188
1234
  );
1189
1235
 
1190
1236
  maybe_escaped_ctrl_char = (
1191
1237
  '\\' c_any %unescape_char %slash_c_char
1192
1238
  | '?' % { lexer->escape = rb_str_new2("\x7f"); }
1193
- | ( c_any - [\\?] ) % { lexer->escape = rb_str_substr(lexer->source, p - 1, 1); } %slash_c_char
1239
+ | ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
1194
1240
  );
1195
1241
 
1196
1242
  escape = (
@@ -1636,6 +1682,47 @@ void Init_lexer()
1636
1682
  fnext *stack[--top]; fbreak;
1637
1683
  };
1638
1684
 
1685
+ '@' [0-9]+
1686
+ => {
1687
+ VALUE token = tok(lexer, ts, te);
1688
+
1689
+ if (lexer->version < 27) {
1690
+ VALUE hash = rb_hash_new();
1691
+ rb_hash_aset(hash, ID2SYM(rb_intern("name")), token);
1692
+ diagnostic(lexer, severity_error, ivar_name, hash, range(lexer, ts, te), empty_array);
1693
+ }
1694
+
1695
+ VALUE value = rb_funcall(token, rb_intern("[]"), 1, rb_range_new(INT2NUM(1), INT2NUM(-1), 0));
1696
+ VALUE int_value = rb_funcall(value, rb_intern("to_i"), 0);
1697
+
1698
+ if (*RSTRING_PTR(value) == '0') {
1699
+ diagnostic(lexer, severity_error, leading_zero_in_numparam, Qnil, range(lexer, ts, te), empty_array);
1700
+ }
1701
+
1702
+ if (FIX2INT(int_value) > NUMPARAM_MAX) {
1703
+ diagnostic(lexer, severity_error, too_large_numparam, Qnil, range(lexer, ts, te), empty_array);
1704
+ }
1705
+
1706
+ VALUE context = rb_iv_get(self, "@context");
1707
+ int in_block = RTEST(rb_funcall(context, rb_intern("in_block?"), 0));
1708
+ int in_lambda = RTEST(rb_funcall(context, rb_intern("in_lambda?"), 0));
1709
+
1710
+ if (!in_block && !in_lambda) {
1711
+ diagnostic(lexer, severity_error, numparam_outside_block, Qnil, range(lexer, ts, te), empty_array);
1712
+ }
1713
+
1714
+ VALUE max_numparam_stack = lexer->max_numparam_stack;
1715
+ int can_have_numparams = RTEST(rb_funcall(max_numparam_stack, rb_intern("can_have_numparams?"), 0));
1716
+ if (!can_have_numparams) {
1717
+ diagnostic(lexer, severity_error, ordinary_param_defined, Qnil, range(lexer, ts, te), empty_array);
1718
+ }
1719
+
1720
+ rb_funcall(max_numparam_stack, rb_intern("register"), 1, int_value);
1721
+
1722
+ emit_token(lexer, tNUMPARAM, tok(lexer, ts + 1, te), ts, te);
1723
+ fnext *stack[--top]; fbreak;
1724
+ };
1725
+
1639
1726
  instance_var_v => {
1640
1727
  VALUE str = tok(lexer, ts, te);
1641
1728
 
@@ -1960,7 +2047,15 @@ void Init_lexer()
1960
2047
 
1961
2048
  VALUE delimiter = tok(lexer, rng_s, rng_e);
1962
2049
 
1963
- if (lexer->version >= 24) {
2050
+ if (lexer->version >= 27) {
2051
+ int newlines_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline));
2052
+ int slash_r_count = NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, slash_r));
2053
+
2054
+ if (newlines_count > 0 || slash_r_count > 0) {
2055
+ diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil,
2056
+ range(lexer, ts, ts + 1), empty_array);
2057
+ }
2058
+ } else if (lexer->version >= 24) {
1964
2059
  if (NUM2INT(rb_funcall(delimiter, rb_intern("count"), 1, newline)) > 0) {
1965
2060
  if (str_end_with_p(delimiter, "\n")) {
1966
2061
  diagnostic(lexer, warning, heredoc_id_ends_with_nl, Qnil,
@@ -1985,6 +2080,21 @@ void Init_lexer()
1985
2080
  }
1986
2081
  };
1987
2082
 
2083
+ # Escaped unterminated heredoc start
2084
+ # <<'END | <<"END | <<`END |
2085
+ # <<-'END | <<-"END | <<-`END |
2086
+ # <<~'END | <<~"END | <<~`END
2087
+ #
2088
+ # If the heredoc is terminated the rule above should handle it
2089
+ '<<' [~\-]?
2090
+ ('"' (any - c_nl - '"')*
2091
+ |"'" (any - c_nl - "'")*
2092
+ |"`" (any - c_nl - "`")
2093
+ )
2094
+ => {
2095
+ diagnostic(lexer, severity_error, unterminated_heredoc_id, Qnil, range(lexer, ts, ts + 1), empty_array);
2096
+ };
2097
+
1988
2098
  ':' ('&&' | '||') => {
1989
2099
  fhold; fhold;
1990
2100
  emit_token(lexer, tSYMBEG, tok(lexer, ts, ts + 1), ts, ts + 1);
@@ -2015,6 +2125,23 @@ void Init_lexer()
2015
2125
  fnext expr_end; fbreak;
2016
2126
  };
2017
2127
 
2128
+ ':' ( '@' %{ tm = p - 1; diag_msg = ivar_name; }
2129
+ | '@@' %{ tm = p - 2; diag_msg = cvar_name; }
2130
+ ) [0-9]*
2131
+ => {
2132
+ if (lexer->version >= 27) {
2133
+ VALUE hash = rb_hash_new();
2134
+ rb_hash_aset(hash, ID2SYM(rb_intern("name")), tok(lexer, tm, te));
2135
+ diagnostic(lexer, severity_error, diag_msg, hash, range(lexer, tm, te), empty_array);
2136
+ } else {
2137
+ emit_token(lexer, tCOLON, tok(lexer, ts, ts + 1), ts, ts + 1);
2138
+ p = ts;
2139
+ }
2140
+
2141
+ fnext expr_end; fbreak;
2142
+ };
2143
+
2144
+
2018
2145
  '?' ( e_bs ( escape - ( '\u{' (xdigit+ [ \t]+)+ xdigit+ '}' ))
2019
2146
  | (c_any - c_space_nl - e_bs) % { lexer->escape = Qnil; }
2020
2147
  ) => {
data/lib/c_lexer.rb CHANGED
@@ -74,6 +74,7 @@ module Parser
74
74
  @lexer = ::Parser::CLexer.new(version)
75
75
  @lexer.diagnostics = @diagnostics
76
76
  @lexer.static_env = @static_env
77
+ @lexer.context = @context
77
78
  end
78
79
  end
79
80
 
@@ -83,6 +84,7 @@ module Parser
83
84
  @lexer = ::Parser::CLexer.new(version)
84
85
  @lexer.diagnostics = @diagnostics
85
86
  @lexer.static_env = @static_env
87
+ @lexer.context = @context
86
88
  end
87
89
  end
88
90
 
@@ -92,6 +94,7 @@ module Parser
92
94
  @lexer = ::Parser::CLexer.new(version)
93
95
  @lexer.diagnostics = @diagnostics
94
96
  @lexer.static_env = @static_env
97
+ @lexer.context = @context
95
98
  end
96
99
  end
97
100
  end
@@ -1,3 +1,3 @@
1
1
  module CLexer
2
- VERSION = '2.6.3.0.0'
2
+ VERSION = '2.6.4.0.0'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: c_lexer
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.3.0.0
4
+ version: 2.6.4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ilya Bylich
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - '='
32
32
  - !ruby/object:Gem::Version
33
- version: 2.6.3.0
33
+ version: 2.6.4.0
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - '='
39
39
  - !ruby/object:Gem::Version
40
- version: 2.6.3.0
40
+ version: 2.6.4.0
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement