natalie_parser 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0598d42a8e0b83aa5a340d1615474a4c5a8e7b209032bf574a230778e4d3ce5e'
4
- data.tar.gz: 475cce9e2f078ab440674b48e3b99ba48f163f34b889b376cf681f74a489ce53
3
+ metadata.gz: 31bee2746c1e7a36eca16194e27467264dfbad5131075090e0b94c675ff150b1
4
+ data.tar.gz: 7f9a2a77aa2e34c56faeb740900779ff4682471cb02dbd51403eb1b7f72b13a7
5
5
  SHA512:
6
- metadata.gz: 59cd20f85484845036f934ca5b40c020ad35655824950c036a697bf0bc437d3ebd35eb25bacc7c6f062d096c01b58dea5a1ebd688d0d101192ee7f43b9881b25
7
- data.tar.gz: 53bcc9b34ab02943dda1602141ae949e4fa1dcf1c229dfeda8ef710b01d1d9d9c97137ce7e5b08e88b121025009c9a9fd71edbc77eead37f69f37b794fab02f8
6
+ metadata.gz: 74335871743111340267d41631f3eeae0281ea5f93bb2bb4a6536682a55f325162d428d986ba0182e14adfb27e830f9622048fb443f1561ee9611c988b07bd6b
7
+ data.tar.gz: 2fd301c60b32db6c074946adceaa54fca3736f6e64335d5bb68c9ad01f092bf19f1ce2810fb75e4641e4b6cf7ebf7e77165538282107f214fa116254b0d37ac0
data/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.2.0 (2022-10-24)
4
+
5
+ - FIX: Allow backreference as first arg of implicit call
6
+ - FIX: Exclude `?` and `!` from class/instance/global variable name
7
+ - FIX: Fix parsing of hashes containing assignments
8
+ - FIX: Parse aliases of unary operators and methods named as keywords
9
+ - FIX: Parse alias of keyword-like method to assignment method
10
+ - FIX: Parse alias/undef of assignment methods
11
+ - FIX: Parse backtick as method name
12
+ - FIX: Parse class/module definition in dynamic class/module
13
+ - FIX: Parse `def !`
14
+ - FIX: Parse `def foo::bar`
15
+ - FIX: Parse `foo(<<FOO)`
16
+ - FIX: Parse `for ... do` and `while ... do`
17
+ - FIX: Parse global variables beginning with `$_`
18
+ - FIX: Parse keywords/operators as method names for `::`
19
+ - FIX: Parse __LINE__ constant
20
+ - FIX: Parse more percent strings
21
+ - FIX: Parse more regexes
22
+ - FIX: Parse more stabby procs without parentheses
23
+ - FIX: Parse multiple parameters set to the same default value
24
+ - FIX: Parse parentheses-less stabby proc with keyword arg
25
+ - FIX: Parse undef of methods with special names
26
+ - FIX: Parse `unless ... then`
27
+ - FIX: Parse variable names starting with extended ASCII characters
28
+ - FIX: Raise SyntaxError for `def ====`
29
+ - FIX: Raise syntax error for `def +@.foo`
30
+ - FIX: Tokenize unterminated ternary at EOF
31
+ - FIX: Use lower precedence for block pass
32
+
3
33
  ## 2.1.0 (2022-08-12)
4
34
 
5
35
  - FEAT: Parse for loops
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
80
80
  case NatalieParser::Token::Type::Constant:
81
81
  case NatalieParser::Token::Type::GlobalVariable:
82
82
  case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::OperatorName:
83
84
  case NatalieParser::Token::Type::Symbol:
84
85
  case NatalieParser::Token::Type::SymbolKey: {
85
86
  auto literal = token.literal_string();
@@ -70,9 +70,9 @@ protected:
70
70
  virtual bool skip_whitespace();
71
71
  virtual Token build_next_token();
72
72
  Token consume_symbol();
73
+ SharedPtr<String> consume_word();
73
74
  Token consume_word(Token::Type type);
74
- Token consume_bare_name();
75
- Token consume_constant();
75
+ Token consume_bare_name_or_constant(Token::Type type);
76
76
  Token consume_global_variable();
77
77
  Token consume_heredoc();
78
78
  Token consume_numeric();
@@ -80,11 +80,19 @@ protected:
80
80
  Token consume_nth_ref();
81
81
  long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
82
  long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
- Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
84
84
  Token consume_single_quoted_string(char, char);
85
85
  Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
86
  Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
87
  Token consume_regexp(char start_char, char stop_char);
88
+ Token consume_percent_symbol(char start_char, char stop_char);
89
+ Token consume_interpolated_string(char start_char, char stop_char);
90
+ Token consume_interpolated_shell(char start_char, char stop_char);
91
+ Token consume_percent_lower_w(char start_char, char stop_char);
92
+ Token consume_percent_upper_w(char start_char, char stop_char);
93
+ Token consume_percent_lower_i(char start_char, char stop_char);
94
+ Token consume_percent_upper_i(char start_char, char stop_char);
95
+ Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
88
96
  SharedPtr<String> consume_non_whitespace();
89
97
 
90
98
  void utf32_codepoint_to_utf8(String &buf, long long codepoint);
@@ -95,7 +103,7 @@ protected:
95
103
  bool token_is_first_on_line() const;
96
104
 
97
105
  bool char_can_be_string_or_regexp_delimiter(char c) const {
98
- return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
106
+ return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
107
  }
100
108
 
101
109
  SharedPtr<String> m_input;
@@ -131,5 +139,10 @@ protected:
131
139
  // then increment m_pair_depth
132
140
  char m_start_char { 0 };
133
141
  int m_pair_depth { 0 };
142
+
143
+ size_t m_remaining_method_names { 0 };
144
+ bool m_allow_assignment_method { false };
145
+ Token::Type m_method_name_separator { Token::Type::Invalid };
146
+ Token m_last_method_name {};
134
147
  };
135
148
  }
@@ -49,16 +49,22 @@ public:
49
49
 
50
50
  enum class Precedence;
51
51
 
52
+ enum class IterAllow {
53
+ NONE,
54
+ CURLY_ONLY,
55
+ CURLY_AND_BLOCK,
56
+ };
57
+
52
58
  SharedPtr<Node> tree();
53
59
 
54
60
  private:
55
- bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
61
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
56
62
 
57
63
  Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
64
 
59
65
  bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
66
 
61
- SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
67
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
62
68
 
63
69
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
70
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
@@ -67,8 +73,9 @@ private:
67
73
  SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
74
  SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
75
 
76
+ void reinsert_collapsed_newline();
70
77
  SharedPtr<Node> parse_alias(LocalsHashmap &);
71
- SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
78
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
72
79
  SharedPtr<Node> parse_array(LocalsHashmap &);
73
80
  SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
81
  SharedPtr<Node> parse_begin_block(LocalsHashmap &);
@@ -96,7 +103,8 @@ private:
96
103
  Method,
97
104
  Proc,
98
105
  };
99
- void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
106
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
107
+ SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
100
108
 
101
109
  SharedPtr<Node> parse_encoding(LocalsHashmap &);
102
110
  SharedPtr<Node> parse_end_block(LocalsHashmap &);
@@ -115,6 +123,7 @@ private:
115
123
  SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
116
124
  SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
117
125
  SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
126
+ SharedPtr<Node> parse_line_constant(LocalsHashmap &);
118
127
  SharedPtr<Node> parse_lit(LocalsHashmap &);
119
128
  SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
120
129
  SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
@@ -124,7 +133,7 @@ private:
124
133
  SharedPtr<Node> parse_nil(LocalsHashmap &);
125
134
  SharedPtr<Node> parse_not(LocalsHashmap &);
126
135
  SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
127
- void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
136
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
128
137
  SharedPtr<Node> parse_redo(LocalsHashmap &);
129
138
  SharedPtr<Node> parse_retry(LocalsHashmap &);
130
139
  SharedPtr<Node> parse_return(LocalsHashmap &);
@@ -103,6 +103,7 @@ public:
103
103
  NotKeyword,
104
104
  NotMatch,
105
105
  NthRef,
106
+ OperatorName,
106
107
  OrKeyword,
107
108
  Percent,
108
109
  PercentEqual,
@@ -431,6 +432,8 @@ public:
431
432
  return "!";
432
433
  case Type::NthRef:
433
434
  return "nth_ref";
435
+ case Type::OperatorName:
436
+ return "operator";
434
437
  case Type::OrKeyword:
435
438
  return "or";
436
439
  case Type::Percent:
@@ -614,6 +617,7 @@ public:
614
617
  case Token::Type::LessThanOrEqual:
615
618
  case Token::Type::Match:
616
619
  case Token::Type::Minus:
620
+ case Token::Type::Not:
617
621
  case Token::Type::NotEqual:
618
622
  case Token::Type::NotMatch:
619
623
  case Token::Type::Percent:
@@ -635,6 +639,7 @@ public:
635
639
  bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
636
640
  bool is_comma() const { return m_type == Type::Comma; }
637
641
  bool is_comment() const { return m_type == Type::Comment; }
642
+ bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
638
643
  bool is_def_keyword() const { return m_type == Type::DefKeyword; }
639
644
  bool is_doc() const { return m_type == Type::Doc; }
640
645
  bool is_dot() const { return m_type == Type::Dot; }
@@ -743,9 +748,23 @@ public:
743
748
  }
744
749
  }
745
750
 
751
+ bool can_be_first_arg_of_def() const {
752
+ switch (m_type) {
753
+ case Token::Type::Ampersand:
754
+ case Token::Type::BareName:
755
+ case Token::Type::Star:
756
+ case Token::Type::StarStar:
757
+ case Token::Type::SymbolKey:
758
+ return true;
759
+ default:
760
+ return false;
761
+ }
762
+ }
763
+
746
764
  bool can_be_first_arg_of_implicit_call() const {
747
765
  switch (m_type) {
748
766
  case Token::Type::Arrow:
767
+ case Token::Type::BackRef:
749
768
  case Token::Type::BareName:
750
769
  case Token::Type::BeginKeyword:
751
770
  case Token::Type::Bignum:
@@ -800,6 +819,7 @@ public:
800
819
  switch (m_type) {
801
820
  case Token::Type::Equal:
802
821
  case Token::Type::LBracket:
822
+ case Token::Type::LParen:
803
823
  return true;
804
824
  default:
805
825
  if (is_operator())
@@ -826,6 +846,7 @@ public:
826
846
 
827
847
  bool can_precede_symbol_key() const {
828
848
  switch (m_type) {
849
+ case Type::Arrow:
829
850
  case Type::BareName:
830
851
  case Type::Comma:
831
852
  case Type::Constant:
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '2.1.0'
4
+ VERSION = '2.2.0'
5
5
  end
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
22
22
  Token InterpolatedStringLexer::consume_string() {
23
23
  SharedPtr<String> buf = new String;
24
24
  while (auto c = current_char()) {
25
- if (c == '\\') {
25
+ if (c == '\\' && m_stop_char != '\\') {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
38
38
  Token RegexpLexer::consume_regexp() {
39
39
  SharedPtr<String> buf = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  switch (c) {
44
44
  case '/':
@@ -38,7 +38,7 @@ Token WordArrayLexer::build_next_token() {
38
38
  Token WordArrayLexer::consume_array() {
39
39
  m_buffer = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  advance();
44
44
  if (c == ' ') {
data/src/lexer.cpp CHANGED
@@ -80,7 +80,47 @@ Token Lexer::next_token() {
80
80
  m_whitespace_precedes = skip_whitespace();
81
81
  m_token_line = m_cursor_line;
82
82
  m_token_column = m_cursor_column;
83
- return build_next_token();
83
+ Token token = build_next_token();
84
+ switch (token.type()) {
85
+ case Token::Type::AliasKeyword:
86
+ m_remaining_method_names = 2;
87
+ break;
88
+ case Token::Type::ConstantResolution:
89
+ case Token::Type::DefKeyword:
90
+ m_remaining_method_names = 1;
91
+ m_allow_assignment_method = true;
92
+ break;
93
+ case Token::Type::Dot:
94
+ m_remaining_method_names = 1;
95
+ break;
96
+ case Token::Type::UndefKeyword:
97
+ m_remaining_method_names = std::numeric_limits<size_t>::max();
98
+ m_method_name_separator = Token::Type::Comma;
99
+ break;
100
+ default:
101
+ if (m_method_name_separator != Token::Type::Invalid) {
102
+ if (m_last_method_name) {
103
+ m_last_method_name = {};
104
+ if (token.type() != m_method_name_separator) {
105
+ m_remaining_method_names = 0;
106
+ m_method_name_separator = Token::Type::Invalid;
107
+ }
108
+ } else {
109
+ m_last_method_name = token;
110
+ }
111
+ } else if (m_remaining_method_names > 0) {
112
+ m_remaining_method_names--;
113
+ } else {
114
+ m_allow_assignment_method = false;
115
+ }
116
+ break;
117
+ }
118
+ return token;
119
+ }
120
+
121
+ bool is_name_start_char(char c) {
122
+ if (!c) return false;
123
+ return (c >= 'a' && c <= 'z') || c == '_' || (unsigned int)c >= 128;
84
124
  }
85
125
 
86
126
  bool is_identifier_char(char c) {
@@ -204,10 +244,10 @@ Token Lexer::build_next_token() {
204
244
  advance();
205
245
  return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
206
246
  case '@':
207
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
247
+ if (m_remaining_method_names > 0) {
208
248
  advance();
209
249
  SharedPtr<String> lit = new String("+@");
210
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
250
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
211
251
  } else {
212
252
  return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
213
253
  }
@@ -224,10 +264,10 @@ Token Lexer::build_next_token() {
224
264
  advance();
225
265
  return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
226
266
  case '@':
227
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
267
+ if (m_remaining_method_names > 0) {
228
268
  advance();
229
269
  SharedPtr<String> lit = new String("-@");
230
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
270
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
231
271
  } else {
232
272
  return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
233
273
  }
@@ -256,17 +296,20 @@ Token Lexer::build_next_token() {
256
296
  advance();
257
297
  if (!m_last_token)
258
298
  return consume_regexp('/', '/');
299
+ if (m_remaining_method_names > 0)
300
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
259
301
  switch (m_last_token.type()) {
260
302
  case Token::Type::Comma:
261
303
  case Token::Type::Doc:
304
+ case Token::Type::Equal:
262
305
  case Token::Type::LBracket:
263
306
  case Token::Type::LCurlyBrace:
264
307
  case Token::Type::LParen:
265
308
  case Token::Type::Match:
266
309
  case Token::Type::Newline:
310
+ case Token::Type::Not:
311
+ case Token::Type::Pipe:
267
312
  return consume_regexp('/', '/');
268
- case Token::Type::DefKeyword:
269
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
270
313
  default: {
271
314
  switch (current_char()) {
272
315
  case ' ':
@@ -295,216 +338,26 @@ Token Lexer::build_next_token() {
295
338
  advance();
296
339
  return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
297
340
  case 'q':
298
- switch (peek()) {
299
- case '[':
300
- advance(2);
301
- return consume_single_quoted_string('[', ']');
302
- case '{':
303
- advance(2);
304
- return consume_single_quoted_string('{', '}');
305
- case '<':
306
- advance(2);
307
- return consume_single_quoted_string('<', '>');
308
- case '(':
309
- advance(2);
310
- return consume_single_quoted_string('(', ')');
311
- default: {
312
- char c = peek();
313
- if (char_can_be_string_or_regexp_delimiter(c)) {
314
- advance(2);
315
- return consume_single_quoted_string(c, c);
316
- } else {
317
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
318
- }
319
- }
320
- }
341
+ return consume_percent_string(&Lexer::consume_single_quoted_string);
321
342
  case 'Q':
322
- switch (peek()) {
323
- case '[':
324
- advance(2);
325
- return consume_double_quoted_string('[', ']');
326
- case '{':
327
- advance(2);
328
- return consume_double_quoted_string('{', '}');
329
- case '<':
330
- advance(2);
331
- return consume_double_quoted_string('<', '>');
332
- case '(':
333
- advance(2);
334
- return consume_double_quoted_string('(', ')');
335
- default: {
336
- char c = peek();
337
- if (char_can_be_string_or_regexp_delimiter(c)) {
338
- advance(2);
339
- return consume_double_quoted_string(c, c);
340
- } else {
341
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
342
- }
343
- }
344
- }
343
+ return consume_percent_string(&Lexer::consume_interpolated_string);
345
344
  case 'r':
346
- switch (peek()) {
347
- case '[':
348
- advance(2);
349
- return consume_regexp('[', ']');
350
- case '{':
351
- advance(2);
352
- return consume_regexp('{', '}');
353
- case '(':
354
- advance(2);
355
- return consume_regexp('(', ')');
356
- case '<':
357
- advance(2);
358
- return consume_regexp('<', '>');
359
- default: {
360
- char c = peek();
361
- if (char_can_be_string_or_regexp_delimiter(c)) {
362
- advance(2);
363
- return consume_regexp(c, c);
364
- } else {
365
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
366
- }
367
- }
368
- }
345
+ return consume_percent_string(&Lexer::consume_regexp);
346
+ case 's':
347
+ return consume_percent_string(&Lexer::consume_percent_symbol);
369
348
  case 'x':
370
- switch (peek()) {
371
- case '/': {
372
- advance(2);
373
- return consume_double_quoted_string('/', '/', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
374
- }
375
- case '[': {
376
- advance(2);
377
- return consume_double_quoted_string('[', ']', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
378
- }
379
- case '{': {
380
- advance(2);
381
- return consume_double_quoted_string('{', '}', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
382
- }
383
- case '(': {
384
- advance(2);
385
- return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
386
- }
387
- default:
388
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
389
- }
349
+ return consume_percent_string(&Lexer::consume_interpolated_shell);
390
350
  case 'w':
391
- switch (peek()) {
392
- case '/':
393
- case '|': {
394
- char c = next();
395
- advance();
396
- return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerW);
397
- }
398
- case '[':
399
- advance(2);
400
- return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerW);
401
- case '{':
402
- advance(2);
403
- return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerW);
404
- case '<':
405
- advance(2);
406
- return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerW);
407
- case '(':
408
- advance(2);
409
- return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
410
- default:
411
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
412
- }
351
+ return consume_percent_string(&Lexer::consume_percent_lower_w);
413
352
  case 'W':
414
- switch (peek()) {
415
- case '/':
416
- case '|': {
417
- char c = next();
418
- advance();
419
- return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperW);
420
- }
421
- case '[':
422
- advance(2);
423
- return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperW);
424
- case '{':
425
- advance(2);
426
- return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperW);
427
- case '<':
428
- advance(2);
429
- return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperW);
430
- case '(':
431
- advance(2);
432
- return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
433
- default:
434
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
435
- }
353
+ return consume_percent_string(&Lexer::consume_percent_upper_w);
436
354
  case 'i':
437
- switch (peek()) {
438
- case '|':
439
- case '/': {
440
- char c = next();
441
- advance();
442
- return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerI);
443
- }
444
- case '[':
445
- advance(2);
446
- return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerI);
447
- case '{':
448
- advance(2);
449
- return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerI);
450
- case '<':
451
- advance(2);
452
- return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerI);
453
- case '(':
454
- advance(2);
455
- return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
456
- default:
457
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
458
- }
355
+ return consume_percent_string(&Lexer::consume_percent_lower_i);
459
356
  case 'I':
460
- switch (peek()) {
461
- case '|':
462
- case '/': {
463
- char c = next();
464
- advance();
465
- return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperI);
466
- }
467
- case '[':
468
- advance(2);
469
- return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperI);
470
- case '{':
471
- advance(2);
472
- return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperI);
473
- case '<':
474
- advance(2);
475
- return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperI);
476
- case '(':
477
- advance(2);
478
- return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
479
- default:
480
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
481
- }
482
- case '[':
483
- advance();
484
- return consume_double_quoted_string('[', ']');
485
- case '{':
486
- advance();
487
- return consume_double_quoted_string('{', '}');
488
- case '<':
489
- advance();
490
- return consume_double_quoted_string('<', '>');
491
- case '(':
492
- if (m_last_token.type() == Token::Type::DefKeyword || m_last_token.type() == Token::Type::Dot) {
493
- // It's a trap! This looks like a %(string) but it's a method def/call!
494
- break;
495
- }
496
- advance();
497
- return consume_double_quoted_string('(', ')');
498
- default: {
499
- auto c = current_char();
500
- if (char_can_be_string_or_regexp_delimiter(c)) {
501
- advance();
502
- return consume_double_quoted_string(c, c);
503
- }
504
- break;
505
- }
357
+ return consume_percent_string(&Lexer::consume_percent_upper_i);
358
+ default:
359
+ return consume_percent_string(&Lexer::consume_interpolated_string, false);
506
360
  }
507
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
508
361
  case '!':
509
362
  advance();
510
363
  switch (current_char()) {
@@ -515,10 +368,10 @@ Token Lexer::build_next_token() {
515
368
  advance();
516
369
  return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column, m_whitespace_precedes };
517
370
  case '@':
518
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
371
+ if (m_remaining_method_names > 0) {
519
372
  advance();
520
373
  SharedPtr<String> lit = new String("!@");
521
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
374
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
522
375
  } else {
523
376
  return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
524
377
  }
@@ -653,10 +506,10 @@ Token Lexer::build_next_token() {
653
506
  advance();
654
507
  switch (current_char()) {
655
508
  case '@':
656
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
509
+ if (m_remaining_method_names > 0) {
657
510
  advance();
658
511
  SharedPtr<String> lit = new String("~@");
659
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
512
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
660
513
  } else {
661
514
  return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
662
515
  }
@@ -665,7 +518,7 @@ Token Lexer::build_next_token() {
665
518
  }
666
519
  case '?': {
667
520
  auto c = next();
668
- if (isspace(c)) {
521
+ if (isspace(c) || c == 0) {
669
522
  m_open_ternary = true;
670
523
  return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column, m_whitespace_precedes };
671
524
  } else {
@@ -695,7 +548,7 @@ Token Lexer::build_next_token() {
695
548
  advance();
696
549
  auto string = consume_single_quoted_string('\'', '\'');
697
550
  return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
698
- } else if (isspace(c)) {
551
+ } else if (isspace(c) || c == 0) {
699
552
  m_open_ternary = false;
700
553
  auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
701
554
  return token;
@@ -793,13 +646,18 @@ Token Lexer::build_next_token() {
793
646
  return Token { Token::Type::Comma, m_file, m_token_line, m_token_column, m_whitespace_precedes };
794
647
  case '"':
795
648
  advance();
796
- return consume_double_quoted_string('"', '"');
649
+ return consume_interpolated_string('"', '"');
797
650
  case '\'':
798
651
  advance();
799
652
  return consume_single_quoted_string('\'', '\'');
800
653
  case '`': {
801
654
  advance();
802
- return consume_double_quoted_string('`', '`', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
655
+ if (m_remaining_method_names > 0) {
656
+ SharedPtr<String> lit = new String("`");
657
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
658
+ } else {
659
+ return consume_interpolated_shell('`', '`');
660
+ }
803
661
  }
804
662
  case '#':
805
663
  if (token_is_first_on_line()) {
@@ -862,14 +720,14 @@ Token Lexer::build_next_token() {
862
720
 
863
721
  Token keyword_token;
864
722
 
865
- if (!m_last_token.is_dot() && match(4, "self")) {
866
- if (current_char() == '.')
723
+ if (!m_last_token.is_dot() && !m_last_token.is_constant_resolution() && match(4, "self")) {
724
+ if (current_char() == '.' || (current_char() == ':' && peek() == ':'))
867
725
  keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
868
726
  else
869
727
  rewind(4);
870
728
  }
871
729
 
872
- if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
730
+ if (m_remaining_method_names == 0) {
873
731
  if (match(12, "__ENCODING__"))
874
732
  keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
875
733
  else if (match(8, "__LINE__"))
@@ -964,10 +822,10 @@ Token Lexer::build_next_token() {
964
822
  }
965
823
 
966
824
  auto c = current_char();
967
- if ((c >= 'a' && c <= 'z') || c == '_') {
968
- return consume_bare_name();
825
+ if (is_name_start_char(c)) {
826
+ return consume_bare_name_or_constant(Token::Type::BareName);
969
827
  } else if (c >= 'A' && c <= 'Z') {
970
- return consume_constant();
828
+ return consume_bare_name_or_constant(Token::Type::Constant);
971
829
  } else {
972
830
  auto buf = consume_non_whitespace();
973
831
  auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
@@ -1097,45 +955,47 @@ Token Lexer::consume_symbol() {
1097
955
  return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1098
956
  }
1099
957
 
1100
- Token Lexer::consume_word(Token::Type type) {
958
+ SharedPtr<String> Lexer::consume_word() {
1101
959
  char c = current_char();
1102
960
  SharedPtr<String> buf = new String("");
1103
961
  do {
1104
962
  buf->append_char(c);
1105
963
  c = next();
1106
964
  } while (is_identifier_char(c));
965
+ return buf;
966
+ }
967
+
968
+ Token Lexer::consume_word(Token::Type type) {
969
+ return Token { type, consume_word(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
970
+ }
971
+
972
+ Token Lexer::consume_bare_name_or_constant(Token::Type type) {
973
+ auto buf = consume_word();
974
+ auto c = current_char();
1107
975
  switch (c) {
1108
976
  case '?':
1109
977
  case '!':
1110
978
  advance();
1111
979
  buf->append_char(c);
1112
980
  break;
981
+ case '=':
982
+ if (m_allow_assignment_method || (!m_last_token.is_dot() && m_remaining_method_names > 0)) {
983
+ advance();
984
+ buf->append_char(c);
985
+ }
986
+ break;
987
+ case ':':
988
+ if (peek() != ':' && m_last_token.can_precede_symbol_key()) {
989
+ advance();
990
+ type = Token::Type::SymbolKey;
991
+ }
992
+ break;
1113
993
  default:
1114
994
  break;
1115
995
  }
1116
996
  return Token { type, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1117
997
  }
1118
998
 
1119
- Token Lexer::consume_bare_name() {
1120
- auto token = consume_word(Token::Type::BareName);
1121
- auto c = current_char();
1122
- if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
1123
- advance();
1124
- token.set_type(Token::Type::SymbolKey);
1125
- }
1126
- return token;
1127
- }
1128
-
1129
- Token Lexer::consume_constant() {
1130
- auto token = consume_word(Token::Type::Constant);
1131
- auto c = current_char();
1132
- if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
1133
- advance();
1134
- token.set_type(Token::Type::SymbolKey);
1135
- }
1136
- return token;
1137
- }
1138
-
1139
999
  Token Lexer::consume_global_variable() {
1140
1000
  switch (peek()) {
1141
1001
  case '?':
@@ -1157,7 +1017,6 @@ Token Lexer::consume_global_variable() {
1157
1017
  case '.':
1158
1018
  case ',':
1159
1019
  case ':':
1160
- case '_':
1161
1020
  case '~': {
1162
1021
  advance();
1163
1022
  SharedPtr<String> buf = new String("$");
@@ -1281,7 +1140,7 @@ Token Lexer::consume_heredoc() {
1281
1140
  }
1282
1141
  advance();
1283
1142
  } else {
1284
- heredoc_name = String(consume_word(Token::Type::BareName).literal());
1143
+ heredoc_name = *consume_word();
1285
1144
  }
1286
1145
 
1287
1146
  SharedPtr<String> doc = new String("");
@@ -1677,7 +1536,7 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1677
1536
  SharedPtr<String> buf = new String("");
1678
1537
  char c = current_char();
1679
1538
  while (c) {
1680
- if (c == '\\') {
1539
+ if (c == '\\' && stop_char != '\\') {
1681
1540
  c = next();
1682
1541
  if (c == stop_char || c == '\\') {
1683
1542
  buf->append_char(c);
@@ -1724,6 +1583,65 @@ Token Lexer::consume_regexp(char start_char, char stop_char) {
1724
1583
  return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1725
1584
  }
1726
1585
 
1586
+ Token Lexer::consume_percent_symbol(char start_char, char stop_char) {
1587
+ Token token = consume_single_quoted_string(start_char, stop_char);
1588
+ token.set_type(Token::Type::Symbol);
1589
+ return token;
1590
+ }
1591
+
1592
+ Token Lexer::consume_interpolated_string(char start_char, char stop_char) {
1593
+ return consume_double_quoted_string(start_char, stop_char, Token::Type::InterpolatedStringBegin, Token::Type::InterpolatedStringEnd);
1594
+ }
1595
+
1596
+ Token Lexer::consume_interpolated_shell(char start_char, char stop_char) {
1597
+ return consume_double_quoted_string(start_char, stop_char, Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
1598
+ }
1599
+
1600
+ Token Lexer::consume_percent_lower_w(char start_char, char stop_char) {
1601
+ return consume_quoted_array_without_interpolation(start_char, stop_char, Token::Type::PercentLowerW);
1602
+ }
1603
+
1604
+ Token Lexer::consume_percent_upper_w(char start_char, char stop_char) {
1605
+ return consume_quoted_array_with_interpolation(start_char, stop_char, Token::Type::PercentUpperW);
1606
+ }
1607
+
1608
+ Token Lexer::consume_percent_lower_i(char start_char, char stop_char) {
1609
+ return consume_quoted_array_without_interpolation(start_char, stop_char, Token::Type::PercentLowerI);
1610
+ }
1611
+
1612
+ Token Lexer::consume_percent_upper_i(char start_char, char stop_char) {
1613
+ return consume_quoted_array_with_interpolation(start_char, stop_char, Token::Type::PercentUpperI);
1614
+ }
1615
+
1616
+ Token Lexer::consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered) {
1617
+ if (m_remaining_method_names > 0) {
1618
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1619
+ }
1620
+ char c = is_lettered ? peek() : current_char();
1621
+ size_t bytes = is_lettered ? 2 : 1;
1622
+ switch (c) {
1623
+ case '[':
1624
+ advance(bytes);
1625
+ return (this->*consumer)('[', ']');
1626
+ case '{':
1627
+ advance(bytes);
1628
+ return (this->*consumer)('{', '}');
1629
+ case '<':
1630
+ advance(bytes);
1631
+ return (this->*consumer)('<', '>');
1632
+ case '(':
1633
+ advance(bytes);
1634
+ return (this->*consumer)('(', ')');
1635
+ default:
1636
+ if (char_can_be_string_or_regexp_delimiter(c)) {
1637
+ advance(bytes);
1638
+ return (this->*consumer)(c, c);
1639
+ } else {
1640
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1641
+ }
1642
+ }
1643
+ }
1644
+
1727
1645
  SharedPtr<String> Lexer::consume_non_whitespace() {
1728
1646
  char c = current_char();
1729
1647
  SharedPtr<String> buf = new String("");
data/src/parser.cpp CHANGED
@@ -46,7 +46,7 @@ enum class Parser::Precedence {
46
46
  REF, // foo[1] / foo[1] = 2
47
47
  };
48
48
 
49
- bool Parser::higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence) {
49
+ bool Parser::higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow) {
50
50
  auto next_precedence = get_precedence(token, left);
51
51
 
52
52
  // printf("token %d, left %d, current_precedence %d, next_precedence %d\n", (int)token.type(), (int)left->type(), (int)current_precedence, (int)next_precedence);
@@ -83,11 +83,11 @@ bool Parser::higher_precedence(Token &token, SharedPtr<Node> left, Precedence cu
83
83
  // NOTE: `m_call_depth` should probably be called
84
84
  // `m_call_that_can_accept_a_block_depth`, but that's a bit long.
85
85
  //
86
- return m_call_depth.last() == 0;
86
+ return iter_allow == IterAllow::CURLY_AND_BLOCK && m_call_depth.last() == 0;
87
87
  }
88
88
 
89
89
  if (next_precedence == Precedence::ITER_CURLY)
90
- return left->is_callable();
90
+ return iter_allow >= IterAllow::CURLY_ONLY && left->is_callable();
91
91
 
92
92
  return next_precedence > current_precedence;
93
93
  }
@@ -198,7 +198,7 @@ Parser::Precedence Parser::get_precedence(Token &token, SharedPtr<Node> left) {
198
198
  return Precedence::LOWEST;
199
199
  }
200
200
 
201
- SharedPtr<Node> Parser::parse_expression(Parser::Precedence precedence, LocalsHashmap &locals) {
201
+ SharedPtr<Node> Parser::parse_expression(Parser::Precedence precedence, LocalsHashmap &locals, IterAllow iter_allow) {
202
202
  skip_newlines();
203
203
 
204
204
  m_precedence_stack.push(precedence);
@@ -211,7 +211,7 @@ SharedPtr<Node> Parser::parse_expression(Parser::Precedence precedence, LocalsHa
211
211
 
212
212
  while (current_token().is_valid()) {
213
213
  auto token = current_token();
214
- if (!higher_precedence(token, left, precedence))
214
+ if (!higher_precedence(token, left, precedence, iter_allow))
215
215
  break;
216
216
  auto left_fn = left_denotation(token, left, precedence);
217
217
  if (!left_fn)
@@ -278,44 +278,47 @@ SharedPtr<BlockNode> Parser::parse_def_body(LocalsHashmap &locals) {
278
278
  return parse_body(locals, Precedence::LOWEST, Token::Type::EndKeyword, true);
279
279
  }
280
280
 
281
+ void Parser::reinsert_collapsed_newline() {
282
+ auto token = previous_token();
283
+ if (token.can_precede_collapsible_newline()) {
284
+ // Some operators at the end of a line cause the newlines to be collapsed:
285
+ //
286
+ // foo <<
287
+ // bar
288
+ //
289
+ // ...but in this case (an alias), collapsing the newline was a mistake:
290
+ //
291
+ // alias foo <<
292
+ // def bar; end
293
+ //
294
+ // So, we'll put the newline back.
295
+ m_tokens->insert(m_index, Token { Token::Type::Newline, token.file(), token.line(), token.column(), token.whitespace_precedes() });
296
+ }
297
+ }
298
+
281
299
  SharedPtr<Node> Parser::parse_alias(LocalsHashmap &locals) {
282
300
  auto token = current_token();
283
301
  advance();
284
- SharedPtr<SymbolNode> new_name = parse_alias_arg(locals, "alias new name (first argument)", false);
285
- auto existing_name = parse_alias_arg(locals, "alias existing name (second argument)", true);
302
+ auto new_name = parse_alias_arg(locals, "alias new name (first argument)");
303
+ auto existing_name = parse_alias_arg(locals, "alias existing name (second argument)");
304
+ reinsert_collapsed_newline();
286
305
  return new AliasNode { token, new_name, existing_name };
287
306
  }
288
307
 
289
- SharedPtr<SymbolNode> Parser::parse_alias_arg(LocalsHashmap &locals, const char *expected_message, bool reinsert_collapsed_newline) {
308
+ SharedPtr<SymbolNode> Parser::parse_alias_arg(LocalsHashmap &locals, const char *expected_message) {
290
309
  auto token = current_token();
291
310
  switch (token.type()) {
292
- // TODO: handle Constant too
293
- case Token::Type::BareName: {
294
- advance();
295
- return new SymbolNode { token, token.literal_string() };
296
- }
311
+ case Token::Type::BareName:
312
+ case Token::Type::Constant:
313
+ case Token::Type::OperatorName:
314
+ return new SymbolNode { token, parse_method_name(locals) };
297
315
  case Token::Type::Symbol:
298
316
  return parse_symbol(locals).static_cast_as<SymbolNode>();
299
317
  case Token::Type::InterpolatedSymbolBegin:
300
318
  return parse_interpolated_symbol(locals).static_cast_as<SymbolNode>();
301
319
  default:
302
320
  if (token.is_operator() || token.is_keyword()) {
303
- advance();
304
- if (token.can_precede_collapsible_newline() && reinsert_collapsed_newline) {
305
- // Some operators at the end of a line cause the newlines to be collapsed:
306
- //
307
- // foo <<
308
- // bar
309
- //
310
- // ...but in this case (an alias), collapsing the newline was a mistake:
311
- //
312
- // alias foo <<
313
- // def bar; end
314
- //
315
- // So, we'll put the newline back.
316
- m_tokens->insert(m_index, Token { Token::Type::Newline, token.file(), token.line(), token.column(), token.whitespace_precedes() });
317
- }
318
- return new SymbolNode { token, new String(token.type_value()) };
321
+ return new SymbolNode { token, parse_method_name(locals) };
319
322
  } else {
320
323
  throw_unexpected(expected_message);
321
324
  }
@@ -501,7 +504,7 @@ SharedPtr<Node> Parser::parse_beginless_range(LocalsHashmap &locals) {
501
504
  SharedPtr<Node> Parser::parse_block_pass(LocalsHashmap &locals) {
502
505
  auto token = current_token();
503
506
  advance();
504
- auto value = parse_expression(Precedence::UNARY_PLUS, locals);
507
+ auto value = parse_expression(Precedence::LOWEST, locals);
505
508
  return new BlockPassNode { token, value };
506
509
  }
507
510
 
@@ -865,15 +868,19 @@ SharedPtr<BlockNode> Parser::parse_case_when_body(LocalsHashmap &locals) {
865
868
  }
866
869
 
867
870
  SharedPtr<Node> Parser::parse_class_or_module_name(LocalsHashmap &locals) {
868
- Token name_token;
869
- if (current_token().type() == Token::Type::ConstantResolution) {
870
- name_token = peek_token();
871
- } else {
872
- name_token = current_token();
873
- }
874
- if (name_token.type() != Token::Type::Constant)
871
+ auto name_token = current_token();
872
+ auto exp = parse_expression(Precedence::LESS_GREATER, locals);
873
+ switch (exp->type()) {
874
+ case Node::Type::Colon2:
875
+ case Node::Type::Colon3:
876
+ return exp;
877
+ case Node::Type::Identifier:
878
+ if (name_token.type() == Token::Type::Constant)
879
+ return exp;
880
+ [[fallthrough]];
881
+ default:
875
882
  throw SyntaxError { "class/module name must be CONSTANT" };
876
- return parse_expression(Precedence::LESS_GREATER, locals);
883
+ }
877
884
  }
878
885
 
879
886
  SharedPtr<Node> Parser::parse_class(LocalsHashmap &locals) {
@@ -991,21 +998,24 @@ SharedPtr<Node> Parser::parse_def(LocalsHashmap &locals) {
991
998
  auto token = current_token();
992
999
  switch (token.type()) {
993
1000
  case Token::Type::BareName:
994
- if (peek_token().type() == Token::Type::Dot) {
1001
+ if (peek_token().is_dot() || peek_token().is_constant_resolution()) {
995
1002
  self_node = parse_identifier(locals);
996
1003
  advance(); // dot
997
1004
  }
998
1005
  name = parse_method_name(locals);
999
1006
  break;
1000
1007
  case Token::Type::Constant:
1001
- if (peek_token().type() == Token::Type::Dot) {
1008
+ if (peek_token().is_dot() || peek_token().is_constant_resolution()) {
1002
1009
  self_node = parse_constant(locals);
1003
1010
  advance(); // dot
1004
1011
  }
1005
1012
  name = parse_method_name(locals);
1006
1013
  break;
1014
+ case Token::Type::OperatorName:
1015
+ name = parse_method_name(locals);
1016
+ break;
1007
1017
  case Token::Type::SelfKeyword:
1008
- if (peek_token().type() == Token::Type::Dot) {
1018
+ if (peek_token().is_dot() || peek_token().is_constant_resolution()) {
1009
1019
  self_node = new SelfNode { current_token() };
1010
1020
  advance(); // self
1011
1021
  advance(); // dot
@@ -1023,10 +1033,6 @@ SharedPtr<Node> Parser::parse_def(LocalsHashmap &locals) {
1023
1033
  }
1024
1034
  }
1025
1035
  }
1026
- if (current_token().is_equal() && !current_token().whitespace_precedes()) {
1027
- advance();
1028
- name->append_char('=');
1029
- }
1030
1036
  auto args = Vector<SharedPtr<Node>> {};
1031
1037
  if (current_token().is_lparen()) {
1032
1038
  advance();
@@ -1037,7 +1043,7 @@ SharedPtr<Node> Parser::parse_def(LocalsHashmap &locals) {
1037
1043
  expect(Token::Type::RParen, "args closing paren");
1038
1044
  advance();
1039
1045
  }
1040
- } else if (current_token().is_bare_name() || current_token().is_splat() || current_token().is_symbol_key()) {
1046
+ } else if (current_token().can_be_first_arg_of_def()) {
1041
1047
  parse_def_args(args, our_locals);
1042
1048
  }
1043
1049
  SharedPtr<BlockNode> body;
@@ -1085,7 +1091,21 @@ void Parser::parse_def_args(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals
1085
1091
  }
1086
1092
  }
1087
1093
 
1088
- void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals, ArgsContext context) {
1094
+ SharedPtr<Node> Parser::parse_arg_default_value(LocalsHashmap &locals, IterAllow iter_allow) {
1095
+ auto token = current_token();
1096
+ if (token.is_bare_name() && peek_token().is_equal()) {
1097
+ SharedPtr<ArgNode> arg = new ArgNode { token, token.literal_string() };
1098
+ advance();
1099
+ advance(); // =
1100
+ arg->add_to_locals(locals);
1101
+ arg->set_value(parse_arg_default_value(locals, iter_allow));
1102
+ return arg.static_cast_as<Node>();
1103
+ } else {
1104
+ return parse_expression(Precedence::DEF_ARG, locals, iter_allow);
1105
+ }
1106
+ }
1107
+
1108
+ void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals, ArgsContext context, IterAllow iter_allow) {
1089
1109
  auto args_have_any_splat = [&]() { return !args.is_empty() && args.last()->type() == Node::Type::Arg && args.last().static_cast_as<ArgNode>()->splat_or_kwsplat(); };
1090
1110
  auto args_have_keyword = [&]() { return !args.is_empty() && args.last()->type() == Node::Type::KeywordArg; };
1091
1111
 
@@ -1105,7 +1125,7 @@ void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &
1105
1125
  if (args_have_any_splat())
1106
1126
  throw_error(token, "default value after splat");
1107
1127
  advance(); // =
1108
- arg->set_value(parse_expression(Precedence::DEF_ARG, locals));
1128
+ arg->set_value(parse_arg_default_value(locals, iter_allow));
1109
1129
  }
1110
1130
  args.push(arg.static_cast_as<Node>());
1111
1131
  return;
@@ -1176,8 +1196,12 @@ void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &
1176
1196
  case Token::Type::RParen:
1177
1197
  case Token::Type::Semicolon:
1178
1198
  break;
1199
+ case Token::Type::LCurlyBrace:
1200
+ if (iter_allow < IterAllow::CURLY_ONLY)
1201
+ break;
1202
+ [[fallthrough]];
1179
1203
  default:
1180
- arg->set_value(parse_expression(Precedence::DEF_ARG, locals));
1204
+ arg->set_value(parse_expression(Precedence::DEF_ARG, locals, iter_allow));
1181
1205
  }
1182
1206
  arg->add_to_locals(locals);
1183
1207
  args.push(arg.static_cast_as<Node>());
@@ -1263,6 +1287,12 @@ SharedPtr<Node> Parser::parse_file_constant(LocalsHashmap &) {
1263
1287
  return new StringNode { token, token.file() };
1264
1288
  }
1265
1289
 
1290
+ SharedPtr<Node> Parser::parse_line_constant(LocalsHashmap &) {
1291
+ auto token = current_token();
1292
+ advance();
1293
+ return new FixnumNode { token, static_cast<long long>(token.line() + 1) };
1294
+ }
1295
+
1266
1296
  SharedPtr<Node> Parser::parse_for(LocalsHashmap &locals) {
1267
1297
  auto token = current_token();
1268
1298
  advance();
@@ -1272,7 +1302,10 @@ SharedPtr<Node> Parser::parse_for(LocalsHashmap &locals) {
1272
1302
  }
1273
1303
  expect(Token::Type::InKeyword, "for in");
1274
1304
  advance();
1275
- auto expr = parse_expression(Precedence::LOWEST, locals);
1305
+ auto expr = parse_expression(Precedence::LOWEST, locals, IterAllow::CURLY_ONLY);
1306
+ if (current_token().type() == Token::Type::DoKeyword) {
1307
+ advance();
1308
+ }
1276
1309
  auto body = parse_body(locals, Precedence::LOWEST);
1277
1310
  expect(Token::Type::EndKeyword, "for end");
1278
1311
  advance();
@@ -1690,13 +1723,15 @@ SharedPtr<String> Parser::parse_method_name(LocalsHashmap &) {
1690
1723
  switch (token.type()) {
1691
1724
  case Token::Type::BareName:
1692
1725
  case Token::Type::Constant:
1726
+ case Token::Type::OperatorName:
1693
1727
  name = current_token().literal_string();
1694
1728
  break;
1695
1729
  default:
1696
- if (token.is_operator() || token.is_keyword())
1730
+ if (token.is_operator() || token.is_keyword()) {
1697
1731
  name = new String(current_token().type_value());
1698
- else
1732
+ } else {
1699
1733
  throw_unexpected("method name");
1734
+ }
1700
1735
  }
1701
1736
  advance();
1702
1737
  return name;
@@ -1766,15 +1801,15 @@ SharedPtr<Node> Parser::parse_nth_ref(LocalsHashmap &) {
1766
1801
  return new NthRefNode { token, token.get_fixnum() };
1767
1802
  }
1768
1803
 
1769
- void Parser::parse_proc_args(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals) {
1804
+ void Parser::parse_proc_args(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals, IterAllow iter_allow) {
1770
1805
  if (current_token().is_semicolon()) {
1771
1806
  parse_shadow_variables_in_args(args, locals);
1772
1807
  return;
1773
1808
  }
1774
- parse_def_single_arg(args, locals, ArgsContext::Proc);
1809
+ parse_def_single_arg(args, locals, ArgsContext::Proc, iter_allow);
1775
1810
  while (current_token().is_comma()) {
1776
1811
  advance();
1777
- parse_def_single_arg(args, locals, ArgsContext::Proc);
1812
+ parse_def_single_arg(args, locals, ArgsContext::Proc, iter_allow);
1778
1813
  }
1779
1814
  if (current_token().is_semicolon()) {
1780
1815
  parse_shadow_variables_in_args(args, locals);
@@ -1887,13 +1922,13 @@ SharedPtr<Node> Parser::parse_stabby_proc(LocalsHashmap &locals) {
1887
1922
  if (current_token().is_rparen()) {
1888
1923
  advance(); // )
1889
1924
  } else {
1890
- parse_proc_args(args, locals);
1925
+ parse_proc_args(args, locals, IterAllow::CURLY_AND_BLOCK);
1891
1926
  expect(Token::Type::RParen, "proc args closing paren");
1892
1927
  advance(); // )
1893
1928
  }
1894
- } else if (current_token().is_bare_name() || current_token().type() == Token::Type::Star) {
1929
+ } else if (current_token().can_be_first_arg_of_def()) {
1895
1930
  has_args = true;
1896
- parse_proc_args(args, locals);
1931
+ parse_proc_args(args, locals, IterAllow::NONE);
1897
1932
  }
1898
1933
  if (current_token().type() != Token::Type::DoKeyword && current_token().type() != Token::Type::LCurlyBrace)
1899
1934
  throw_unexpected("block");
@@ -2065,36 +2100,23 @@ SharedPtr<Node> Parser::parse_unary_operator(LocalsHashmap &locals) {
2065
2100
  SharedPtr<Node> Parser::parse_undef(LocalsHashmap &locals) {
2066
2101
  auto undef_token = current_token();
2067
2102
  advance();
2068
- auto symbol_from_token = [&](Token &token) -> SharedPtr<Node> {
2069
- switch (token.type()) {
2070
- case Token::Type::BareName:
2071
- case Token::Type::Constant:
2072
- advance();
2073
- return new SymbolNode { token, token.literal_string() };
2074
- case Token::Type::Symbol:
2075
- return parse_symbol(locals);
2076
- case Token::Type::InterpolatedSymbolBegin: {
2077
- return parse_interpolated_symbol(locals);
2078
- }
2079
- default:
2080
- throw_unexpected("method name for undef");
2081
- }
2082
- };
2083
2103
  SharedPtr<UndefNode> undef_node = new UndefNode { undef_token };
2084
- auto token = current_token();
2085
- undef_node->add_arg(symbol_from_token(token));
2104
+ auto arg = parse_alias_arg(locals, "method name for undef");
2105
+ undef_node->add_arg(arg.static_cast_as<Node>());
2086
2106
  if (current_token().is_comma()) {
2087
2107
  SharedPtr<BlockNode> block = new BlockNode { undef_token };
2088
2108
  block->add_node(undef_node.static_cast_as<Node>());
2089
2109
  while (current_token().is_comma()) {
2090
2110
  advance();
2091
- token = current_token();
2092
2111
  SharedPtr<UndefNode> undef_node = new UndefNode { undef_token };
2093
- undef_node->add_arg(symbol_from_token(token));
2112
+ auto arg = parse_alias_arg(locals, "method name for undef");
2113
+ undef_node->add_arg(arg.static_cast_as<Node>());
2094
2114
  block->add_node(undef_node.static_cast_as<Node>());
2095
2115
  }
2116
+ reinsert_collapsed_newline();
2096
2117
  return block.static_cast_as<Node>();
2097
2118
  }
2119
+ reinsert_collapsed_newline();
2098
2120
  return undef_node.static_cast_as<Node>();
2099
2121
  };
2100
2122
 
@@ -2416,6 +2438,7 @@ SharedPtr<Node> Parser::parse_constant_resolution_expression(SharedPtr<Node> lef
2416
2438
  SharedPtr<Node> node;
2417
2439
  switch (name_token.type()) {
2418
2440
  case Token::Type::BareName:
2441
+ case Token::Type::OperatorName:
2419
2442
  advance();
2420
2443
  node = new CallNode { name_token, left, name_token.literal_string() };
2421
2444
  break;
@@ -2434,7 +2457,12 @@ SharedPtr<Node> Parser::parse_constant_resolution_expression(SharedPtr<Node> lef
2434
2457
  break;
2435
2458
  }
2436
2459
  default:
2437
- throw_unexpected(name_token, ":: identifier name");
2460
+ if (name_token.is_operator() || name_token.is_keyword()) {
2461
+ advance();
2462
+ node = new CallNode { name_token, left, new String(name_token.type_value()) };
2463
+ } else {
2464
+ throw_unexpected(name_token, ":: identifier name");
2465
+ }
2438
2466
  }
2439
2467
  return node;
2440
2468
  }
@@ -2677,6 +2705,7 @@ SharedPtr<Node> Parser::parse_safe_send_expression(SharedPtr<Node> left, LocalsH
2677
2705
  break;
2678
2706
  case Token::Type::BareName:
2679
2707
  case Token::Type::Constant:
2708
+ case Token::Type::OperatorName:
2680
2709
  name = name_token.literal_string();
2681
2710
  advance();
2682
2711
  break;
@@ -2703,6 +2732,7 @@ SharedPtr<Node> Parser::parse_send_expression(SharedPtr<Node> left, LocalsHashma
2703
2732
  switch (name_token.type()) {
2704
2733
  case Token::Type::BareName:
2705
2734
  case Token::Type::Constant:
2735
+ case Token::Type::OperatorName:
2706
2736
  name = name_token.literal_string();
2707
2737
  advance();
2708
2738
  break;
@@ -2745,7 +2775,11 @@ SharedPtr<Node> Parser::parse_unless(LocalsHashmap &locals) {
2745
2775
  if (condition->type() == Node::Type::Regexp) {
2746
2776
  condition = new MatchNode { condition->token(), condition.static_cast_as<RegexpNode>() };
2747
2777
  }
2748
- next_expression();
2778
+ if (current_token().type() == Token::Type::ThenKeyword) {
2779
+ advance(); // then
2780
+ } else {
2781
+ next_expression();
2782
+ }
2749
2783
  SharedPtr<Node> false_expr = parse_if_body(locals);
2750
2784
  SharedPtr<Node> true_expr;
2751
2785
  if (current_token().is_else_keyword()) {
@@ -2762,11 +2796,15 @@ SharedPtr<Node> Parser::parse_unless(LocalsHashmap &locals) {
2762
2796
  SharedPtr<Node> Parser::parse_while(LocalsHashmap &locals) {
2763
2797
  auto token = current_token();
2764
2798
  advance();
2765
- SharedPtr<Node> condition = parse_expression(Precedence::LOWEST, locals);
2799
+ SharedPtr<Node> condition = parse_expression(Precedence::LOWEST, locals, IterAllow::CURLY_ONLY);
2766
2800
  if (condition->type() == Node::Type::Regexp) {
2767
2801
  condition = new MatchNode { condition->token(), condition.static_cast_as<RegexpNode>() };
2768
2802
  }
2769
- next_expression();
2803
+ if (current_token().type() == Token::Type::DoKeyword) {
2804
+ advance();
2805
+ } else {
2806
+ next_expression();
2807
+ }
2770
2808
  SharedPtr<BlockNode> body = parse_body(locals, Precedence::LOWEST);
2771
2809
  expect(Token::Type::EndKeyword, "while end");
2772
2810
  advance();
@@ -2824,6 +2862,8 @@ Parser::parse_null_fn Parser::null_denotation(Token::Type type) {
2824
2862
  return &Parser::parse_group;
2825
2863
  case Type::LCurlyBrace:
2826
2864
  return &Parser::parse_hash;
2865
+ case Type::LINEKeyword:
2866
+ return &Parser::parse_line_constant;
2827
2867
  case Type::BareName:
2828
2868
  case Type::ClassVariable:
2829
2869
  case Type::Constant:
@@ -2907,7 +2947,7 @@ Parser::parse_left_fn Parser::left_denotation(Token &token, SharedPtr<Node> left
2907
2947
  using Type = Token::Type;
2908
2948
  switch (token.type()) {
2909
2949
  case Type::Equal:
2910
- if (precedence == Precedence::ARRAY || precedence == Precedence::BARE_CALL_ARG || precedence == Precedence::CALL_ARG)
2950
+ if (precedence == Precedence::ARRAY || precedence == Precedence::HASH || precedence == Precedence::BARE_CALL_ARG || precedence == Precedence::CALL_ARG)
2911
2951
  return &Parser::parse_assignment_expression_without_multiple_values;
2912
2952
  else
2913
2953
  return &Parser::parse_assignment_expression;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natalie_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Morgan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-12 00:00:00.000000000 Z
11
+ date: 2022-10-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: NatalieParser is a zero-dependency, from-scratch, hand-written recursive
14
14
  descent parser for the Ruby Programming Language.