natalie_parser 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '0598d42a8e0b83aa5a340d1615474a4c5a8e7b209032bf574a230778e4d3ce5e'
4
- data.tar.gz: 475cce9e2f078ab440674b48e3b99ba48f163f34b889b376cf681f74a489ce53
3
+ metadata.gz: 31bee2746c1e7a36eca16194e27467264dfbad5131075090e0b94c675ff150b1
4
+ data.tar.gz: 7f9a2a77aa2e34c56faeb740900779ff4682471cb02dbd51403eb1b7f72b13a7
5
5
  SHA512:
6
- metadata.gz: 59cd20f85484845036f934ca5b40c020ad35655824950c036a697bf0bc437d3ebd35eb25bacc7c6f062d096c01b58dea5a1ebd688d0d101192ee7f43b9881b25
7
- data.tar.gz: 53bcc9b34ab02943dda1602141ae949e4fa1dcf1c229dfeda8ef710b01d1d9d9c97137ce7e5b08e88b121025009c9a9fd71edbc77eead37f69f37b794fab02f8
6
+ metadata.gz: 74335871743111340267d41631f3eeae0281ea5f93bb2bb4a6536682a55f325162d428d986ba0182e14adfb27e830f9622048fb443f1561ee9611c988b07bd6b
7
+ data.tar.gz: 2fd301c60b32db6c074946adceaa54fca3736f6e64335d5bb68c9ad01f092bf19f1ce2810fb75e4641e4b6cf7ebf7e77165538282107f214fa116254b0d37ac0
data/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.2.0 (2022-10-24)
4
+
5
+ - FIX: Allow backreference as first arg of implicit call
6
+ - FIX: Exclude `?` and `!` from class/instance/global variable name
7
+ - FIX: Fix parsing of hashes containing assignments
8
+ - FIX: Parse aliases of unary operators and methods named as keywords
9
+ - FIX: Parse alias of keyword-like method to assignment method
10
+ - FIX: Parse alias/undef of assignment methods
11
+ - FIX: Parse backtick as method name
12
+ - FIX: Parse class/module definition in dynamic class/module
13
+ - FIX: Parse `def !`
14
+ - FIX: Parse `def foo::bar`
15
+ - FIX: Parse `foo(<<FOO)`
16
+ - FIX: Parse `for ... do` and `while ... do`
17
+ - FIX: Parse global variables beginning with `$_`
18
+ - FIX: Parse keywords/operators as method names for `::`
19
+ - FIX: Parse __LINE__ constant
20
+ - FIX: Parse more percent strings
21
+ - FIX: Parse more regexes
22
+ - FIX: Parse more stabby procs without parentheses
23
+ - FIX: Parse multiple parameters set to the same default value
24
+ - FIX: Parse parentheses-less stabby proc with keyword arg
25
+ - FIX: Parse undef of methods with special names
26
+ - FIX: Parse `unless ... then`
27
+ - FIX: Parse variable names starting with extended ASCII characters
28
+ - FIX: Raise SyntaxError for `def ====`
29
+ - FIX: Raise syntax error for `def +@.foo`
30
+ - FIX: Tokenize unterminated ternary at EOF
31
+ - FIX: Use lower precedence for block pass
32
+
3
33
  ## 2.1.0 (2022-08-12)
4
34
 
5
35
  - FEAT: Parse for loops
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
80
80
  case NatalieParser::Token::Type::Constant:
81
81
  case NatalieParser::Token::Type::GlobalVariable:
82
82
  case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::OperatorName:
83
84
  case NatalieParser::Token::Type::Symbol:
84
85
  case NatalieParser::Token::Type::SymbolKey: {
85
86
  auto literal = token.literal_string();
@@ -70,9 +70,9 @@ protected:
70
70
  virtual bool skip_whitespace();
71
71
  virtual Token build_next_token();
72
72
  Token consume_symbol();
73
+ SharedPtr<String> consume_word();
73
74
  Token consume_word(Token::Type type);
74
- Token consume_bare_name();
75
- Token consume_constant();
75
+ Token consume_bare_name_or_constant(Token::Type type);
76
76
  Token consume_global_variable();
77
77
  Token consume_heredoc();
78
78
  Token consume_numeric();
@@ -80,11 +80,19 @@ protected:
80
80
  Token consume_nth_ref();
81
81
  long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
82
  long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
- Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
84
84
  Token consume_single_quoted_string(char, char);
85
85
  Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
86
  Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
87
  Token consume_regexp(char start_char, char stop_char);
88
+ Token consume_percent_symbol(char start_char, char stop_char);
89
+ Token consume_interpolated_string(char start_char, char stop_char);
90
+ Token consume_interpolated_shell(char start_char, char stop_char);
91
+ Token consume_percent_lower_w(char start_char, char stop_char);
92
+ Token consume_percent_upper_w(char start_char, char stop_char);
93
+ Token consume_percent_lower_i(char start_char, char stop_char);
94
+ Token consume_percent_upper_i(char start_char, char stop_char);
95
+ Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
88
96
  SharedPtr<String> consume_non_whitespace();
89
97
 
90
98
  void utf32_codepoint_to_utf8(String &buf, long long codepoint);
@@ -95,7 +103,7 @@ protected:
95
103
  bool token_is_first_on_line() const;
96
104
 
97
105
  bool char_can_be_string_or_regexp_delimiter(char c) const {
98
- return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
106
+ return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
107
  }
100
108
 
101
109
  SharedPtr<String> m_input;
@@ -131,5 +139,10 @@ protected:
131
139
  // then increment m_pair_depth
132
140
  char m_start_char { 0 };
133
141
  int m_pair_depth { 0 };
142
+
143
+ size_t m_remaining_method_names { 0 };
144
+ bool m_allow_assignment_method { false };
145
+ Token::Type m_method_name_separator { Token::Type::Invalid };
146
+ Token m_last_method_name {};
134
147
  };
135
148
  }
@@ -49,16 +49,22 @@ public:
49
49
 
50
50
  enum class Precedence;
51
51
 
52
+ enum class IterAllow {
53
+ NONE,
54
+ CURLY_ONLY,
55
+ CURLY_AND_BLOCK,
56
+ };
57
+
52
58
  SharedPtr<Node> tree();
53
59
 
54
60
  private:
55
- bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
61
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
56
62
 
57
63
  Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
64
 
59
65
  bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
66
 
61
- SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
67
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
62
68
 
63
69
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
70
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
@@ -67,8 +73,9 @@ private:
67
73
  SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
74
  SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
75
 
76
+ void reinsert_collapsed_newline();
70
77
  SharedPtr<Node> parse_alias(LocalsHashmap &);
71
- SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
78
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
72
79
  SharedPtr<Node> parse_array(LocalsHashmap &);
73
80
  SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
81
  SharedPtr<Node> parse_begin_block(LocalsHashmap &);
@@ -96,7 +103,8 @@ private:
96
103
  Method,
97
104
  Proc,
98
105
  };
99
- void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
106
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
107
+ SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
100
108
 
101
109
  SharedPtr<Node> parse_encoding(LocalsHashmap &);
102
110
  SharedPtr<Node> parse_end_block(LocalsHashmap &);
@@ -115,6 +123,7 @@ private:
115
123
  SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
116
124
  SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
117
125
  SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
126
+ SharedPtr<Node> parse_line_constant(LocalsHashmap &);
118
127
  SharedPtr<Node> parse_lit(LocalsHashmap &);
119
128
  SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
120
129
  SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
@@ -124,7 +133,7 @@ private:
124
133
  SharedPtr<Node> parse_nil(LocalsHashmap &);
125
134
  SharedPtr<Node> parse_not(LocalsHashmap &);
126
135
  SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
127
- void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
136
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
128
137
  SharedPtr<Node> parse_redo(LocalsHashmap &);
129
138
  SharedPtr<Node> parse_retry(LocalsHashmap &);
130
139
  SharedPtr<Node> parse_return(LocalsHashmap &);
@@ -103,6 +103,7 @@ public:
103
103
  NotKeyword,
104
104
  NotMatch,
105
105
  NthRef,
106
+ OperatorName,
106
107
  OrKeyword,
107
108
  Percent,
108
109
  PercentEqual,
@@ -431,6 +432,8 @@ public:
431
432
  return "!";
432
433
  case Type::NthRef:
433
434
  return "nth_ref";
435
+ case Type::OperatorName:
436
+ return "operator";
434
437
  case Type::OrKeyword:
435
438
  return "or";
436
439
  case Type::Percent:
@@ -614,6 +617,7 @@ public:
614
617
  case Token::Type::LessThanOrEqual:
615
618
  case Token::Type::Match:
616
619
  case Token::Type::Minus:
620
+ case Token::Type::Not:
617
621
  case Token::Type::NotEqual:
618
622
  case Token::Type::NotMatch:
619
623
  case Token::Type::Percent:
@@ -635,6 +639,7 @@ public:
635
639
  bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
636
640
  bool is_comma() const { return m_type == Type::Comma; }
637
641
  bool is_comment() const { return m_type == Type::Comment; }
642
+ bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
638
643
  bool is_def_keyword() const { return m_type == Type::DefKeyword; }
639
644
  bool is_doc() const { return m_type == Type::Doc; }
640
645
  bool is_dot() const { return m_type == Type::Dot; }
@@ -743,9 +748,23 @@ public:
743
748
  }
744
749
  }
745
750
 
751
+ bool can_be_first_arg_of_def() const {
752
+ switch (m_type) {
753
+ case Token::Type::Ampersand:
754
+ case Token::Type::BareName:
755
+ case Token::Type::Star:
756
+ case Token::Type::StarStar:
757
+ case Token::Type::SymbolKey:
758
+ return true;
759
+ default:
760
+ return false;
761
+ }
762
+ }
763
+
746
764
  bool can_be_first_arg_of_implicit_call() const {
747
765
  switch (m_type) {
748
766
  case Token::Type::Arrow:
767
+ case Token::Type::BackRef:
749
768
  case Token::Type::BareName:
750
769
  case Token::Type::BeginKeyword:
751
770
  case Token::Type::Bignum:
@@ -800,6 +819,7 @@ public:
800
819
  switch (m_type) {
801
820
  case Token::Type::Equal:
802
821
  case Token::Type::LBracket:
822
+ case Token::Type::LParen:
803
823
  return true;
804
824
  default:
805
825
  if (is_operator())
@@ -826,6 +846,7 @@ public:
826
846
 
827
847
  bool can_precede_symbol_key() const {
828
848
  switch (m_type) {
849
+ case Type::Arrow:
829
850
  case Type::BareName:
830
851
  case Type::Comma:
831
852
  case Type::Constant:
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '2.1.0'
4
+ VERSION = '2.2.0'
5
5
  end
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
22
22
  Token InterpolatedStringLexer::consume_string() {
23
23
  SharedPtr<String> buf = new String;
24
24
  while (auto c = current_char()) {
25
- if (c == '\\') {
25
+ if (c == '\\' && m_stop_char != '\\') {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
38
38
  Token RegexpLexer::consume_regexp() {
39
39
  SharedPtr<String> buf = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  switch (c) {
44
44
  case '/':
@@ -38,7 +38,7 @@ Token WordArrayLexer::build_next_token() {
38
38
  Token WordArrayLexer::consume_array() {
39
39
  m_buffer = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  advance();
44
44
  if (c == ' ') {
data/src/lexer.cpp CHANGED
@@ -80,7 +80,47 @@ Token Lexer::next_token() {
80
80
  m_whitespace_precedes = skip_whitespace();
81
81
  m_token_line = m_cursor_line;
82
82
  m_token_column = m_cursor_column;
83
- return build_next_token();
83
+ Token token = build_next_token();
84
+ switch (token.type()) {
85
+ case Token::Type::AliasKeyword:
86
+ m_remaining_method_names = 2;
87
+ break;
88
+ case Token::Type::ConstantResolution:
89
+ case Token::Type::DefKeyword:
90
+ m_remaining_method_names = 1;
91
+ m_allow_assignment_method = true;
92
+ break;
93
+ case Token::Type::Dot:
94
+ m_remaining_method_names = 1;
95
+ break;
96
+ case Token::Type::UndefKeyword:
97
+ m_remaining_method_names = std::numeric_limits<size_t>::max();
98
+ m_method_name_separator = Token::Type::Comma;
99
+ break;
100
+ default:
101
+ if (m_method_name_separator != Token::Type::Invalid) {
102
+ if (m_last_method_name) {
103
+ m_last_method_name = {};
104
+ if (token.type() != m_method_name_separator) {
105
+ m_remaining_method_names = 0;
106
+ m_method_name_separator = Token::Type::Invalid;
107
+ }
108
+ } else {
109
+ m_last_method_name = token;
110
+ }
111
+ } else if (m_remaining_method_names > 0) {
112
+ m_remaining_method_names--;
113
+ } else {
114
+ m_allow_assignment_method = false;
115
+ }
116
+ break;
117
+ }
118
+ return token;
119
+ }
120
+
121
+ bool is_name_start_char(char c) {
122
+ if (!c) return false;
123
+ return (c >= 'a' && c <= 'z') || c == '_' || (unsigned int)c >= 128;
84
124
  }
85
125
 
86
126
  bool is_identifier_char(char c) {
@@ -204,10 +244,10 @@ Token Lexer::build_next_token() {
204
244
  advance();
205
245
  return Token { Token::Type::PlusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
206
246
  case '@':
207
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
247
+ if (m_remaining_method_names > 0) {
208
248
  advance();
209
249
  SharedPtr<String> lit = new String("+@");
210
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
250
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
211
251
  } else {
212
252
  return Token { Token::Type::Plus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
213
253
  }
@@ -224,10 +264,10 @@ Token Lexer::build_next_token() {
224
264
  advance();
225
265
  return Token { Token::Type::MinusEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
226
266
  case '@':
227
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
267
+ if (m_remaining_method_names > 0) {
228
268
  advance();
229
269
  SharedPtr<String> lit = new String("-@");
230
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
270
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
231
271
  } else {
232
272
  return Token { Token::Type::Minus, m_file, m_token_line, m_token_column, m_whitespace_precedes };
233
273
  }
@@ -256,17 +296,20 @@ Token Lexer::build_next_token() {
256
296
  advance();
257
297
  if (!m_last_token)
258
298
  return consume_regexp('/', '/');
299
+ if (m_remaining_method_names > 0)
300
+ return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
259
301
  switch (m_last_token.type()) {
260
302
  case Token::Type::Comma:
261
303
  case Token::Type::Doc:
304
+ case Token::Type::Equal:
262
305
  case Token::Type::LBracket:
263
306
  case Token::Type::LCurlyBrace:
264
307
  case Token::Type::LParen:
265
308
  case Token::Type::Match:
266
309
  case Token::Type::Newline:
310
+ case Token::Type::Not:
311
+ case Token::Type::Pipe:
267
312
  return consume_regexp('/', '/');
268
- case Token::Type::DefKeyword:
269
- return Token { Token::Type::Slash, m_file, m_token_line, m_token_column, m_whitespace_precedes };
270
313
  default: {
271
314
  switch (current_char()) {
272
315
  case ' ':
@@ -295,216 +338,26 @@ Token Lexer::build_next_token() {
295
338
  advance();
296
339
  return Token { Token::Type::PercentEqual, m_file, m_token_line, m_token_column, m_whitespace_precedes };
297
340
  case 'q':
298
- switch (peek()) {
299
- case '[':
300
- advance(2);
301
- return consume_single_quoted_string('[', ']');
302
- case '{':
303
- advance(2);
304
- return consume_single_quoted_string('{', '}');
305
- case '<':
306
- advance(2);
307
- return consume_single_quoted_string('<', '>');
308
- case '(':
309
- advance(2);
310
- return consume_single_quoted_string('(', ')');
311
- default: {
312
- char c = peek();
313
- if (char_can_be_string_or_regexp_delimiter(c)) {
314
- advance(2);
315
- return consume_single_quoted_string(c, c);
316
- } else {
317
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
318
- }
319
- }
320
- }
341
+ return consume_percent_string(&Lexer::consume_single_quoted_string);
321
342
  case 'Q':
322
- switch (peek()) {
323
- case '[':
324
- advance(2);
325
- return consume_double_quoted_string('[', ']');
326
- case '{':
327
- advance(2);
328
- return consume_double_quoted_string('{', '}');
329
- case '<':
330
- advance(2);
331
- return consume_double_quoted_string('<', '>');
332
- case '(':
333
- advance(2);
334
- return consume_double_quoted_string('(', ')');
335
- default: {
336
- char c = peek();
337
- if (char_can_be_string_or_regexp_delimiter(c)) {
338
- advance(2);
339
- return consume_double_quoted_string(c, c);
340
- } else {
341
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
342
- }
343
- }
344
- }
343
+ return consume_percent_string(&Lexer::consume_interpolated_string);
345
344
  case 'r':
346
- switch (peek()) {
347
- case '[':
348
- advance(2);
349
- return consume_regexp('[', ']');
350
- case '{':
351
- advance(2);
352
- return consume_regexp('{', '}');
353
- case '(':
354
- advance(2);
355
- return consume_regexp('(', ')');
356
- case '<':
357
- advance(2);
358
- return consume_regexp('<', '>');
359
- default: {
360
- char c = peek();
361
- if (char_can_be_string_or_regexp_delimiter(c)) {
362
- advance(2);
363
- return consume_regexp(c, c);
364
- } else {
365
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
366
- }
367
- }
368
- }
345
+ return consume_percent_string(&Lexer::consume_regexp);
346
+ case 's':
347
+ return consume_percent_string(&Lexer::consume_percent_symbol);
369
348
  case 'x':
370
- switch (peek()) {
371
- case '/': {
372
- advance(2);
373
- return consume_double_quoted_string('/', '/', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
374
- }
375
- case '[': {
376
- advance(2);
377
- return consume_double_quoted_string('[', ']', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
378
- }
379
- case '{': {
380
- advance(2);
381
- return consume_double_quoted_string('{', '}', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
382
- }
383
- case '(': {
384
- advance(2);
385
- return consume_double_quoted_string('(', ')', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
386
- }
387
- default:
388
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
389
- }
349
+ return consume_percent_string(&Lexer::consume_interpolated_shell);
390
350
  case 'w':
391
- switch (peek()) {
392
- case '/':
393
- case '|': {
394
- char c = next();
395
- advance();
396
- return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerW);
397
- }
398
- case '[':
399
- advance(2);
400
- return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerW);
401
- case '{':
402
- advance(2);
403
- return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerW);
404
- case '<':
405
- advance(2);
406
- return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerW);
407
- case '(':
408
- advance(2);
409
- return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerW);
410
- default:
411
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
412
- }
351
+ return consume_percent_string(&Lexer::consume_percent_lower_w);
413
352
  case 'W':
414
- switch (peek()) {
415
- case '/':
416
- case '|': {
417
- char c = next();
418
- advance();
419
- return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperW);
420
- }
421
- case '[':
422
- advance(2);
423
- return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperW);
424
- case '{':
425
- advance(2);
426
- return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperW);
427
- case '<':
428
- advance(2);
429
- return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperW);
430
- case '(':
431
- advance(2);
432
- return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperW);
433
- default:
434
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
435
- }
353
+ return consume_percent_string(&Lexer::consume_percent_upper_w);
436
354
  case 'i':
437
- switch (peek()) {
438
- case '|':
439
- case '/': {
440
- char c = next();
441
- advance();
442
- return consume_quoted_array_without_interpolation(c, c, Token::Type::PercentLowerI);
443
- }
444
- case '[':
445
- advance(2);
446
- return consume_quoted_array_without_interpolation('[', ']', Token::Type::PercentLowerI);
447
- case '{':
448
- advance(2);
449
- return consume_quoted_array_without_interpolation('{', '}', Token::Type::PercentLowerI);
450
- case '<':
451
- advance(2);
452
- return consume_quoted_array_without_interpolation('<', '>', Token::Type::PercentLowerI);
453
- case '(':
454
- advance(2);
455
- return consume_quoted_array_without_interpolation('(', ')', Token::Type::PercentLowerI);
456
- default:
457
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
458
- }
355
+ return consume_percent_string(&Lexer::consume_percent_lower_i);
459
356
  case 'I':
460
- switch (peek()) {
461
- case '|':
462
- case '/': {
463
- char c = next();
464
- advance();
465
- return consume_quoted_array_with_interpolation(0, c, Token::Type::PercentUpperI);
466
- }
467
- case '[':
468
- advance(2);
469
- return consume_quoted_array_with_interpolation('[', ']', Token::Type::PercentUpperI);
470
- case '{':
471
- advance(2);
472
- return consume_quoted_array_with_interpolation('{', '}', Token::Type::PercentUpperI);
473
- case '<':
474
- advance(2);
475
- return consume_quoted_array_with_interpolation('<', '>', Token::Type::PercentUpperI);
476
- case '(':
477
- advance(2);
478
- return consume_quoted_array_with_interpolation('(', ')', Token::Type::PercentUpperI);
479
- default:
480
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
481
- }
482
- case '[':
483
- advance();
484
- return consume_double_quoted_string('[', ']');
485
- case '{':
486
- advance();
487
- return consume_double_quoted_string('{', '}');
488
- case '<':
489
- advance();
490
- return consume_double_quoted_string('<', '>');
491
- case '(':
492
- if (m_last_token.type() == Token::Type::DefKeyword || m_last_token.type() == Token::Type::Dot) {
493
- // It's a trap! This looks like a %(string) but it's a method def/call!
494
- break;
495
- }
496
- advance();
497
- return consume_double_quoted_string('(', ')');
498
- default: {
499
- auto c = current_char();
500
- if (char_can_be_string_or_regexp_delimiter(c)) {
501
- advance();
502
- return consume_double_quoted_string(c, c);
503
- }
504
- break;
505
- }
357
+ return consume_percent_string(&Lexer::consume_percent_upper_i);
358
+ default:
359
+ return consume_percent_string(&Lexer::consume_interpolated_string, false);
506
360
  }
507
- return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
508
361
  case '!':
509
362
  advance();
510
363
  switch (current_char()) {
@@ -515,10 +368,10 @@ Token Lexer::build_next_token() {
515
368
  advance();
516
369
  return Token { Token::Type::NotMatch, m_file, m_token_line, m_token_column, m_whitespace_precedes };
517
370
  case '@':
518
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
371
+ if (m_remaining_method_names > 0) {
519
372
  advance();
520
373
  SharedPtr<String> lit = new String("!@");
521
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
374
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
522
375
  } else {
523
376
  return Token { Token::Type::Not, m_file, m_token_line, m_token_column, m_whitespace_precedes };
524
377
  }
@@ -653,10 +506,10 @@ Token Lexer::build_next_token() {
653
506
  advance();
654
507
  switch (current_char()) {
655
508
  case '@':
656
- if (m_last_token.is_def_keyword() || m_last_token.is_dot()) {
509
+ if (m_remaining_method_names > 0) {
657
510
  advance();
658
511
  SharedPtr<String> lit = new String("~@");
659
- return Token { Token::Type::BareName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
512
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
660
513
  } else {
661
514
  return Token { Token::Type::Tilde, m_file, m_token_line, m_token_column, m_whitespace_precedes };
662
515
  }
@@ -665,7 +518,7 @@ Token Lexer::build_next_token() {
665
518
  }
666
519
  case '?': {
667
520
  auto c = next();
668
- if (isspace(c)) {
521
+ if (isspace(c) || c == 0) {
669
522
  m_open_ternary = true;
670
523
  return Token { Token::Type::TernaryQuestion, m_file, m_token_line, m_token_column, m_whitespace_precedes };
671
524
  } else {
@@ -695,7 +548,7 @@ Token Lexer::build_next_token() {
695
548
  advance();
696
549
  auto string = consume_single_quoted_string('\'', '\'');
697
550
  return Token { Token::Type::Symbol, string.literal(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
698
- } else if (isspace(c)) {
551
+ } else if (isspace(c) || c == 0) {
699
552
  m_open_ternary = false;
700
553
  auto token = Token { Token::Type::TernaryColon, m_file, m_token_line, m_token_column, m_whitespace_precedes };
701
554
  return token;
@@ -793,13 +646,18 @@ Token Lexer::build_next_token() {
793
646
  return Token { Token::Type::Comma, m_file, m_token_line, m_token_column, m_whitespace_precedes };
794
647
  case '"':
795
648
  advance();
796
- return consume_double_quoted_string('"', '"');
649
+ return consume_interpolated_string('"', '"');
797
650
  case '\'':
798
651
  advance();
799
652
  return consume_single_quoted_string('\'', '\'');
800
653
  case '`': {
801
654
  advance();
802
- return consume_double_quoted_string('`', '`', Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
655
+ if (m_remaining_method_names > 0) {
656
+ SharedPtr<String> lit = new String("`");
657
+ return Token { Token::Type::OperatorName, lit, m_file, m_token_line, m_token_column, m_whitespace_precedes };
658
+ } else {
659
+ return consume_interpolated_shell('`', '`');
660
+ }
803
661
  }
804
662
  case '#':
805
663
  if (token_is_first_on_line()) {
@@ -862,14 +720,14 @@ Token Lexer::build_next_token() {
862
720
 
863
721
  Token keyword_token;
864
722
 
865
- if (!m_last_token.is_dot() && match(4, "self")) {
866
- if (current_char() == '.')
723
+ if (!m_last_token.is_dot() && !m_last_token.is_constant_resolution() && match(4, "self")) {
724
+ if (current_char() == '.' || (current_char() == ':' && peek() == ':'))
867
725
  keyword_token = { Token::Type::SelfKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
868
726
  else
869
727
  rewind(4);
870
728
  }
871
729
 
872
- if (!m_last_token.is_dot() && !m_last_token.is_def_keyword()) {
730
+ if (m_remaining_method_names == 0) {
873
731
  if (match(12, "__ENCODING__"))
874
732
  keyword_token = { Token::Type::ENCODINGKeyword, m_file, m_token_line, m_token_column, m_whitespace_precedes };
875
733
  else if (match(8, "__LINE__"))
@@ -964,10 +822,10 @@ Token Lexer::build_next_token() {
964
822
  }
965
823
 
966
824
  auto c = current_char();
967
- if ((c >= 'a' && c <= 'z') || c == '_') {
968
- return consume_bare_name();
825
+ if (is_name_start_char(c)) {
826
+ return consume_bare_name_or_constant(Token::Type::BareName);
969
827
  } else if (c >= 'A' && c <= 'Z') {
970
- return consume_constant();
828
+ return consume_bare_name_or_constant(Token::Type::Constant);
971
829
  } else {
972
830
  auto buf = consume_non_whitespace();
973
831
  auto token = Token { Token::Type::Invalid, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
@@ -1097,45 +955,47 @@ Token Lexer::consume_symbol() {
1097
955
  return Token { Token::Type::Symbol, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1098
956
  }
1099
957
 
1100
- Token Lexer::consume_word(Token::Type type) {
958
+ SharedPtr<String> Lexer::consume_word() {
1101
959
  char c = current_char();
1102
960
  SharedPtr<String> buf = new String("");
1103
961
  do {
1104
962
  buf->append_char(c);
1105
963
  c = next();
1106
964
  } while (is_identifier_char(c));
965
+ return buf;
966
+ }
967
+
968
+ Token Lexer::consume_word(Token::Type type) {
969
+ return Token { type, consume_word(), m_file, m_token_line, m_token_column, m_whitespace_precedes };
970
+ }
971
+
972
+ Token Lexer::consume_bare_name_or_constant(Token::Type type) {
973
+ auto buf = consume_word();
974
+ auto c = current_char();
1107
975
  switch (c) {
1108
976
  case '?':
1109
977
  case '!':
1110
978
  advance();
1111
979
  buf->append_char(c);
1112
980
  break;
981
+ case '=':
982
+ if (m_allow_assignment_method || (!m_last_token.is_dot() && m_remaining_method_names > 0)) {
983
+ advance();
984
+ buf->append_char(c);
985
+ }
986
+ break;
987
+ case ':':
988
+ if (peek() != ':' && m_last_token.can_precede_symbol_key()) {
989
+ advance();
990
+ type = Token::Type::SymbolKey;
991
+ }
992
+ break;
1113
993
  default:
1114
994
  break;
1115
995
  }
1116
996
  return Token { type, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1117
997
  }
1118
998
 
1119
- Token Lexer::consume_bare_name() {
1120
- auto token = consume_word(Token::Type::BareName);
1121
- auto c = current_char();
1122
- if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
1123
- advance();
1124
- token.set_type(Token::Type::SymbolKey);
1125
- }
1126
- return token;
1127
- }
1128
-
1129
- Token Lexer::consume_constant() {
1130
- auto token = consume_word(Token::Type::Constant);
1131
- auto c = current_char();
1132
- if (c == ':' && peek() != ':' && m_last_token.can_precede_symbol_key()) {
1133
- advance();
1134
- token.set_type(Token::Type::SymbolKey);
1135
- }
1136
- return token;
1137
- }
1138
-
1139
999
  Token Lexer::consume_global_variable() {
1140
1000
  switch (peek()) {
1141
1001
  case '?':
@@ -1157,7 +1017,6 @@ Token Lexer::consume_global_variable() {
1157
1017
  case '.':
1158
1018
  case ',':
1159
1019
  case ':':
1160
- case '_':
1161
1020
  case '~': {
1162
1021
  advance();
1163
1022
  SharedPtr<String> buf = new String("$");
@@ -1281,7 +1140,7 @@ Token Lexer::consume_heredoc() {
1281
1140
  }
1282
1141
  advance();
1283
1142
  } else {
1284
- heredoc_name = String(consume_word(Token::Type::BareName).literal());
1143
+ heredoc_name = *consume_word();
1285
1144
  }
1286
1145
 
1287
1146
  SharedPtr<String> doc = new String("");
@@ -1677,7 +1536,7 @@ Token Lexer::consume_single_quoted_string(char start_char, char stop_char) {
1677
1536
  SharedPtr<String> buf = new String("");
1678
1537
  char c = current_char();
1679
1538
  while (c) {
1680
- if (c == '\\') {
1539
+ if (c == '\\' && stop_char != '\\') {
1681
1540
  c = next();
1682
1541
  if (c == stop_char || c == '\\') {
1683
1542
  buf->append_char(c);
@@ -1724,6 +1583,65 @@ Token Lexer::consume_regexp(char start_char, char stop_char) {
1724
1583
  return Token { Token::Type::InterpolatedRegexpBegin, start_char, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1725
1584
  }
1726
1585
 
1586
+ Token Lexer::consume_percent_symbol(char start_char, char stop_char) {
1587
+ Token token = consume_single_quoted_string(start_char, stop_char);
1588
+ token.set_type(Token::Type::Symbol);
1589
+ return token;
1590
+ }
1591
+
1592
+ Token Lexer::consume_interpolated_string(char start_char, char stop_char) {
1593
+ return consume_double_quoted_string(start_char, stop_char, Token::Type::InterpolatedStringBegin, Token::Type::InterpolatedStringEnd);
1594
+ }
1595
+
1596
+ Token Lexer::consume_interpolated_shell(char start_char, char stop_char) {
1597
+ return consume_double_quoted_string(start_char, stop_char, Token::Type::InterpolatedShellBegin, Token::Type::InterpolatedShellEnd);
1598
+ }
1599
+
1600
+ Token Lexer::consume_percent_lower_w(char start_char, char stop_char) {
1601
+ return consume_quoted_array_without_interpolation(start_char, stop_char, Token::Type::PercentLowerW);
1602
+ }
1603
+
1604
+ Token Lexer::consume_percent_upper_w(char start_char, char stop_char) {
1605
+ return consume_quoted_array_with_interpolation(start_char, stop_char, Token::Type::PercentUpperW);
1606
+ }
1607
+
1608
+ Token Lexer::consume_percent_lower_i(char start_char, char stop_char) {
1609
+ return consume_quoted_array_without_interpolation(start_char, stop_char, Token::Type::PercentLowerI);
1610
+ }
1611
+
1612
+ Token Lexer::consume_percent_upper_i(char start_char, char stop_char) {
1613
+ return consume_quoted_array_with_interpolation(start_char, stop_char, Token::Type::PercentUpperI);
1614
+ }
1615
+
1616
+ Token Lexer::consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered) {
1617
+ if (m_remaining_method_names > 0) {
1618
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1619
+ }
1620
+ char c = is_lettered ? peek() : current_char();
1621
+ size_t bytes = is_lettered ? 2 : 1;
1622
+ switch (c) {
1623
+ case '[':
1624
+ advance(bytes);
1625
+ return (this->*consumer)('[', ']');
1626
+ case '{':
1627
+ advance(bytes);
1628
+ return (this->*consumer)('{', '}');
1629
+ case '<':
1630
+ advance(bytes);
1631
+ return (this->*consumer)('<', '>');
1632
+ case '(':
1633
+ advance(bytes);
1634
+ return (this->*consumer)('(', ')');
1635
+ default:
1636
+ if (char_can_be_string_or_regexp_delimiter(c)) {
1637
+ advance(bytes);
1638
+ return (this->*consumer)(c, c);
1639
+ } else {
1640
+ return Token { Token::Type::Percent, m_file, m_token_line, m_token_column, m_whitespace_precedes };
1641
+ }
1642
+ }
1643
+ }
1644
+
1727
1645
  SharedPtr<String> Lexer::consume_non_whitespace() {
1728
1646
  char c = current_char();
1729
1647
  SharedPtr<String> buf = new String("");
data/src/parser.cpp CHANGED
@@ -46,7 +46,7 @@ enum class Parser::Precedence {
46
46
  REF, // foo[1] / foo[1] = 2
47
47
  };
48
48
 
49
- bool Parser::higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence) {
49
+ bool Parser::higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow) {
50
50
  auto next_precedence = get_precedence(token, left);
51
51
 
52
52
  // printf("token %d, left %d, current_precedence %d, next_precedence %d\n", (int)token.type(), (int)left->type(), (int)current_precedence, (int)next_precedence);
@@ -83,11 +83,11 @@ bool Parser::higher_precedence(Token &token, SharedPtr<Node> left, Precedence cu
83
83
  // NOTE: `m_call_depth` should probably be called
84
84
  // `m_call_that_can_accept_a_block_depth`, but that's a bit long.
85
85
  //
86
- return m_call_depth.last() == 0;
86
+ return iter_allow == IterAllow::CURLY_AND_BLOCK && m_call_depth.last() == 0;
87
87
  }
88
88
 
89
89
  if (next_precedence == Precedence::ITER_CURLY)
90
- return left->is_callable();
90
+ return iter_allow >= IterAllow::CURLY_ONLY && left->is_callable();
91
91
 
92
92
  return next_precedence > current_precedence;
93
93
  }
@@ -198,7 +198,7 @@ Parser::Precedence Parser::get_precedence(Token &token, SharedPtr<Node> left) {
198
198
  return Precedence::LOWEST;
199
199
  }
200
200
 
201
- SharedPtr<Node> Parser::parse_expression(Parser::Precedence precedence, LocalsHashmap &locals) {
201
+ SharedPtr<Node> Parser::parse_expression(Parser::Precedence precedence, LocalsHashmap &locals, IterAllow iter_allow) {
202
202
  skip_newlines();
203
203
 
204
204
  m_precedence_stack.push(precedence);
@@ -211,7 +211,7 @@ SharedPtr<Node> Parser::parse_expression(Parser::Precedence precedence, LocalsHa
211
211
 
212
212
  while (current_token().is_valid()) {
213
213
  auto token = current_token();
214
- if (!higher_precedence(token, left, precedence))
214
+ if (!higher_precedence(token, left, precedence, iter_allow))
215
215
  break;
216
216
  auto left_fn = left_denotation(token, left, precedence);
217
217
  if (!left_fn)
@@ -278,44 +278,47 @@ SharedPtr<BlockNode> Parser::parse_def_body(LocalsHashmap &locals) {
278
278
  return parse_body(locals, Precedence::LOWEST, Token::Type::EndKeyword, true);
279
279
  }
280
280
 
281
+ void Parser::reinsert_collapsed_newline() {
282
+ auto token = previous_token();
283
+ if (token.can_precede_collapsible_newline()) {
284
+ // Some operators at the end of a line cause the newlines to be collapsed:
285
+ //
286
+ // foo <<
287
+ // bar
288
+ //
289
+ // ...but in this case (an alias), collapsing the newline was a mistake:
290
+ //
291
+ // alias foo <<
292
+ // def bar; end
293
+ //
294
+ // So, we'll put the newline back.
295
+ m_tokens->insert(m_index, Token { Token::Type::Newline, token.file(), token.line(), token.column(), token.whitespace_precedes() });
296
+ }
297
+ }
298
+
281
299
  SharedPtr<Node> Parser::parse_alias(LocalsHashmap &locals) {
282
300
  auto token = current_token();
283
301
  advance();
284
- SharedPtr<SymbolNode> new_name = parse_alias_arg(locals, "alias new name (first argument)", false);
285
- auto existing_name = parse_alias_arg(locals, "alias existing name (second argument)", true);
302
+ auto new_name = parse_alias_arg(locals, "alias new name (first argument)");
303
+ auto existing_name = parse_alias_arg(locals, "alias existing name (second argument)");
304
+ reinsert_collapsed_newline();
286
305
  return new AliasNode { token, new_name, existing_name };
287
306
  }
288
307
 
289
- SharedPtr<SymbolNode> Parser::parse_alias_arg(LocalsHashmap &locals, const char *expected_message, bool reinsert_collapsed_newline) {
308
+ SharedPtr<SymbolNode> Parser::parse_alias_arg(LocalsHashmap &locals, const char *expected_message) {
290
309
  auto token = current_token();
291
310
  switch (token.type()) {
292
- // TODO: handle Constant too
293
- case Token::Type::BareName: {
294
- advance();
295
- return new SymbolNode { token, token.literal_string() };
296
- }
311
+ case Token::Type::BareName:
312
+ case Token::Type::Constant:
313
+ case Token::Type::OperatorName:
314
+ return new SymbolNode { token, parse_method_name(locals) };
297
315
  case Token::Type::Symbol:
298
316
  return parse_symbol(locals).static_cast_as<SymbolNode>();
299
317
  case Token::Type::InterpolatedSymbolBegin:
300
318
  return parse_interpolated_symbol(locals).static_cast_as<SymbolNode>();
301
319
  default:
302
320
  if (token.is_operator() || token.is_keyword()) {
303
- advance();
304
- if (token.can_precede_collapsible_newline() && reinsert_collapsed_newline) {
305
- // Some operators at the end of a line cause the newlines to be collapsed:
306
- //
307
- // foo <<
308
- // bar
309
- //
310
- // ...but in this case (an alias), collapsing the newline was a mistake:
311
- //
312
- // alias foo <<
313
- // def bar; end
314
- //
315
- // So, we'll put the newline back.
316
- m_tokens->insert(m_index, Token { Token::Type::Newline, token.file(), token.line(), token.column(), token.whitespace_precedes() });
317
- }
318
- return new SymbolNode { token, new String(token.type_value()) };
321
+ return new SymbolNode { token, parse_method_name(locals) };
319
322
  } else {
320
323
  throw_unexpected(expected_message);
321
324
  }
@@ -501,7 +504,7 @@ SharedPtr<Node> Parser::parse_beginless_range(LocalsHashmap &locals) {
501
504
  SharedPtr<Node> Parser::parse_block_pass(LocalsHashmap &locals) {
502
505
  auto token = current_token();
503
506
  advance();
504
- auto value = parse_expression(Precedence::UNARY_PLUS, locals);
507
+ auto value = parse_expression(Precedence::LOWEST, locals);
505
508
  return new BlockPassNode { token, value };
506
509
  }
507
510
 
@@ -865,15 +868,19 @@ SharedPtr<BlockNode> Parser::parse_case_when_body(LocalsHashmap &locals) {
865
868
  }
866
869
 
867
870
  SharedPtr<Node> Parser::parse_class_or_module_name(LocalsHashmap &locals) {
868
- Token name_token;
869
- if (current_token().type() == Token::Type::ConstantResolution) {
870
- name_token = peek_token();
871
- } else {
872
- name_token = current_token();
873
- }
874
- if (name_token.type() != Token::Type::Constant)
871
+ auto name_token = current_token();
872
+ auto exp = parse_expression(Precedence::LESS_GREATER, locals);
873
+ switch (exp->type()) {
874
+ case Node::Type::Colon2:
875
+ case Node::Type::Colon3:
876
+ return exp;
877
+ case Node::Type::Identifier:
878
+ if (name_token.type() == Token::Type::Constant)
879
+ return exp;
880
+ [[fallthrough]];
881
+ default:
875
882
  throw SyntaxError { "class/module name must be CONSTANT" };
876
- return parse_expression(Precedence::LESS_GREATER, locals);
883
+ }
877
884
  }
878
885
 
879
886
  SharedPtr<Node> Parser::parse_class(LocalsHashmap &locals) {
@@ -991,21 +998,24 @@ SharedPtr<Node> Parser::parse_def(LocalsHashmap &locals) {
991
998
  auto token = current_token();
992
999
  switch (token.type()) {
993
1000
  case Token::Type::BareName:
994
- if (peek_token().type() == Token::Type::Dot) {
1001
+ if (peek_token().is_dot() || peek_token().is_constant_resolution()) {
995
1002
  self_node = parse_identifier(locals);
996
1003
  advance(); // dot
997
1004
  }
998
1005
  name = parse_method_name(locals);
999
1006
  break;
1000
1007
  case Token::Type::Constant:
1001
- if (peek_token().type() == Token::Type::Dot) {
1008
+ if (peek_token().is_dot() || peek_token().is_constant_resolution()) {
1002
1009
  self_node = parse_constant(locals);
1003
1010
  advance(); // dot
1004
1011
  }
1005
1012
  name = parse_method_name(locals);
1006
1013
  break;
1014
+ case Token::Type::OperatorName:
1015
+ name = parse_method_name(locals);
1016
+ break;
1007
1017
  case Token::Type::SelfKeyword:
1008
- if (peek_token().type() == Token::Type::Dot) {
1018
+ if (peek_token().is_dot() || peek_token().is_constant_resolution()) {
1009
1019
  self_node = new SelfNode { current_token() };
1010
1020
  advance(); // self
1011
1021
  advance(); // dot
@@ -1023,10 +1033,6 @@ SharedPtr<Node> Parser::parse_def(LocalsHashmap &locals) {
1023
1033
  }
1024
1034
  }
1025
1035
  }
1026
- if (current_token().is_equal() && !current_token().whitespace_precedes()) {
1027
- advance();
1028
- name->append_char('=');
1029
- }
1030
1036
  auto args = Vector<SharedPtr<Node>> {};
1031
1037
  if (current_token().is_lparen()) {
1032
1038
  advance();
@@ -1037,7 +1043,7 @@ SharedPtr<Node> Parser::parse_def(LocalsHashmap &locals) {
1037
1043
  expect(Token::Type::RParen, "args closing paren");
1038
1044
  advance();
1039
1045
  }
1040
- } else if (current_token().is_bare_name() || current_token().is_splat() || current_token().is_symbol_key()) {
1046
+ } else if (current_token().can_be_first_arg_of_def()) {
1041
1047
  parse_def_args(args, our_locals);
1042
1048
  }
1043
1049
  SharedPtr<BlockNode> body;
@@ -1085,7 +1091,21 @@ void Parser::parse_def_args(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals
1085
1091
  }
1086
1092
  }
1087
1093
 
1088
- void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals, ArgsContext context) {
1094
+ SharedPtr<Node> Parser::parse_arg_default_value(LocalsHashmap &locals, IterAllow iter_allow) {
1095
+ auto token = current_token();
1096
+ if (token.is_bare_name() && peek_token().is_equal()) {
1097
+ SharedPtr<ArgNode> arg = new ArgNode { token, token.literal_string() };
1098
+ advance();
1099
+ advance(); // =
1100
+ arg->add_to_locals(locals);
1101
+ arg->set_value(parse_arg_default_value(locals, iter_allow));
1102
+ return arg.static_cast_as<Node>();
1103
+ } else {
1104
+ return parse_expression(Precedence::DEF_ARG, locals, iter_allow);
1105
+ }
1106
+ }
1107
+
1108
+ void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals, ArgsContext context, IterAllow iter_allow) {
1089
1109
  auto args_have_any_splat = [&]() { return !args.is_empty() && args.last()->type() == Node::Type::Arg && args.last().static_cast_as<ArgNode>()->splat_or_kwsplat(); };
1090
1110
  auto args_have_keyword = [&]() { return !args.is_empty() && args.last()->type() == Node::Type::KeywordArg; };
1091
1111
 
@@ -1105,7 +1125,7 @@ void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &
1105
1125
  if (args_have_any_splat())
1106
1126
  throw_error(token, "default value after splat");
1107
1127
  advance(); // =
1108
- arg->set_value(parse_expression(Precedence::DEF_ARG, locals));
1128
+ arg->set_value(parse_arg_default_value(locals, iter_allow));
1109
1129
  }
1110
1130
  args.push(arg.static_cast_as<Node>());
1111
1131
  return;
@@ -1176,8 +1196,12 @@ void Parser::parse_def_single_arg(Vector<SharedPtr<Node>> &args, LocalsHashmap &
1176
1196
  case Token::Type::RParen:
1177
1197
  case Token::Type::Semicolon:
1178
1198
  break;
1199
+ case Token::Type::LCurlyBrace:
1200
+ if (iter_allow < IterAllow::CURLY_ONLY)
1201
+ break;
1202
+ [[fallthrough]];
1179
1203
  default:
1180
- arg->set_value(parse_expression(Precedence::DEF_ARG, locals));
1204
+ arg->set_value(parse_expression(Precedence::DEF_ARG, locals, iter_allow));
1181
1205
  }
1182
1206
  arg->add_to_locals(locals);
1183
1207
  args.push(arg.static_cast_as<Node>());
@@ -1263,6 +1287,12 @@ SharedPtr<Node> Parser::parse_file_constant(LocalsHashmap &) {
1263
1287
  return new StringNode { token, token.file() };
1264
1288
  }
1265
1289
 
1290
+ SharedPtr<Node> Parser::parse_line_constant(LocalsHashmap &) {
1291
+ auto token = current_token();
1292
+ advance();
1293
+ return new FixnumNode { token, static_cast<long long>(token.line() + 1) };
1294
+ }
1295
+
1266
1296
  SharedPtr<Node> Parser::parse_for(LocalsHashmap &locals) {
1267
1297
  auto token = current_token();
1268
1298
  advance();
@@ -1272,7 +1302,10 @@ SharedPtr<Node> Parser::parse_for(LocalsHashmap &locals) {
1272
1302
  }
1273
1303
  expect(Token::Type::InKeyword, "for in");
1274
1304
  advance();
1275
- auto expr = parse_expression(Precedence::LOWEST, locals);
1305
+ auto expr = parse_expression(Precedence::LOWEST, locals, IterAllow::CURLY_ONLY);
1306
+ if (current_token().type() == Token::Type::DoKeyword) {
1307
+ advance();
1308
+ }
1276
1309
  auto body = parse_body(locals, Precedence::LOWEST);
1277
1310
  expect(Token::Type::EndKeyword, "for end");
1278
1311
  advance();
@@ -1690,13 +1723,15 @@ SharedPtr<String> Parser::parse_method_name(LocalsHashmap &) {
1690
1723
  switch (token.type()) {
1691
1724
  case Token::Type::BareName:
1692
1725
  case Token::Type::Constant:
1726
+ case Token::Type::OperatorName:
1693
1727
  name = current_token().literal_string();
1694
1728
  break;
1695
1729
  default:
1696
- if (token.is_operator() || token.is_keyword())
1730
+ if (token.is_operator() || token.is_keyword()) {
1697
1731
  name = new String(current_token().type_value());
1698
- else
1732
+ } else {
1699
1733
  throw_unexpected("method name");
1734
+ }
1700
1735
  }
1701
1736
  advance();
1702
1737
  return name;
@@ -1766,15 +1801,15 @@ SharedPtr<Node> Parser::parse_nth_ref(LocalsHashmap &) {
1766
1801
  return new NthRefNode { token, token.get_fixnum() };
1767
1802
  }
1768
1803
 
1769
- void Parser::parse_proc_args(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals) {
1804
+ void Parser::parse_proc_args(Vector<SharedPtr<Node>> &args, LocalsHashmap &locals, IterAllow iter_allow) {
1770
1805
  if (current_token().is_semicolon()) {
1771
1806
  parse_shadow_variables_in_args(args, locals);
1772
1807
  return;
1773
1808
  }
1774
- parse_def_single_arg(args, locals, ArgsContext::Proc);
1809
+ parse_def_single_arg(args, locals, ArgsContext::Proc, iter_allow);
1775
1810
  while (current_token().is_comma()) {
1776
1811
  advance();
1777
- parse_def_single_arg(args, locals, ArgsContext::Proc);
1812
+ parse_def_single_arg(args, locals, ArgsContext::Proc, iter_allow);
1778
1813
  }
1779
1814
  if (current_token().is_semicolon()) {
1780
1815
  parse_shadow_variables_in_args(args, locals);
@@ -1887,13 +1922,13 @@ SharedPtr<Node> Parser::parse_stabby_proc(LocalsHashmap &locals) {
1887
1922
  if (current_token().is_rparen()) {
1888
1923
  advance(); // )
1889
1924
  } else {
1890
- parse_proc_args(args, locals);
1925
+ parse_proc_args(args, locals, IterAllow::CURLY_AND_BLOCK);
1891
1926
  expect(Token::Type::RParen, "proc args closing paren");
1892
1927
  advance(); // )
1893
1928
  }
1894
- } else if (current_token().is_bare_name() || current_token().type() == Token::Type::Star) {
1929
+ } else if (current_token().can_be_first_arg_of_def()) {
1895
1930
  has_args = true;
1896
- parse_proc_args(args, locals);
1931
+ parse_proc_args(args, locals, IterAllow::NONE);
1897
1932
  }
1898
1933
  if (current_token().type() != Token::Type::DoKeyword && current_token().type() != Token::Type::LCurlyBrace)
1899
1934
  throw_unexpected("block");
@@ -2065,36 +2100,23 @@ SharedPtr<Node> Parser::parse_unary_operator(LocalsHashmap &locals) {
2065
2100
  SharedPtr<Node> Parser::parse_undef(LocalsHashmap &locals) {
2066
2101
  auto undef_token = current_token();
2067
2102
  advance();
2068
- auto symbol_from_token = [&](Token &token) -> SharedPtr<Node> {
2069
- switch (token.type()) {
2070
- case Token::Type::BareName:
2071
- case Token::Type::Constant:
2072
- advance();
2073
- return new SymbolNode { token, token.literal_string() };
2074
- case Token::Type::Symbol:
2075
- return parse_symbol(locals);
2076
- case Token::Type::InterpolatedSymbolBegin: {
2077
- return parse_interpolated_symbol(locals);
2078
- }
2079
- default:
2080
- throw_unexpected("method name for undef");
2081
- }
2082
- };
2083
2103
  SharedPtr<UndefNode> undef_node = new UndefNode { undef_token };
2084
- auto token = current_token();
2085
- undef_node->add_arg(symbol_from_token(token));
2104
+ auto arg = parse_alias_arg(locals, "method name for undef");
2105
+ undef_node->add_arg(arg.static_cast_as<Node>());
2086
2106
  if (current_token().is_comma()) {
2087
2107
  SharedPtr<BlockNode> block = new BlockNode { undef_token };
2088
2108
  block->add_node(undef_node.static_cast_as<Node>());
2089
2109
  while (current_token().is_comma()) {
2090
2110
  advance();
2091
- token = current_token();
2092
2111
  SharedPtr<UndefNode> undef_node = new UndefNode { undef_token };
2093
- undef_node->add_arg(symbol_from_token(token));
2112
+ auto arg = parse_alias_arg(locals, "method name for undef");
2113
+ undef_node->add_arg(arg.static_cast_as<Node>());
2094
2114
  block->add_node(undef_node.static_cast_as<Node>());
2095
2115
  }
2116
+ reinsert_collapsed_newline();
2096
2117
  return block.static_cast_as<Node>();
2097
2118
  }
2119
+ reinsert_collapsed_newline();
2098
2120
  return undef_node.static_cast_as<Node>();
2099
2121
  };
2100
2122
 
@@ -2416,6 +2438,7 @@ SharedPtr<Node> Parser::parse_constant_resolution_expression(SharedPtr<Node> lef
2416
2438
  SharedPtr<Node> node;
2417
2439
  switch (name_token.type()) {
2418
2440
  case Token::Type::BareName:
2441
+ case Token::Type::OperatorName:
2419
2442
  advance();
2420
2443
  node = new CallNode { name_token, left, name_token.literal_string() };
2421
2444
  break;
@@ -2434,7 +2457,12 @@ SharedPtr<Node> Parser::parse_constant_resolution_expression(SharedPtr<Node> lef
2434
2457
  break;
2435
2458
  }
2436
2459
  default:
2437
- throw_unexpected(name_token, ":: identifier name");
2460
+ if (name_token.is_operator() || name_token.is_keyword()) {
2461
+ advance();
2462
+ node = new CallNode { name_token, left, new String(name_token.type_value()) };
2463
+ } else {
2464
+ throw_unexpected(name_token, ":: identifier name");
2465
+ }
2438
2466
  }
2439
2467
  return node;
2440
2468
  }
@@ -2677,6 +2705,7 @@ SharedPtr<Node> Parser::parse_safe_send_expression(SharedPtr<Node> left, LocalsH
2677
2705
  break;
2678
2706
  case Token::Type::BareName:
2679
2707
  case Token::Type::Constant:
2708
+ case Token::Type::OperatorName:
2680
2709
  name = name_token.literal_string();
2681
2710
  advance();
2682
2711
  break;
@@ -2703,6 +2732,7 @@ SharedPtr<Node> Parser::parse_send_expression(SharedPtr<Node> left, LocalsHashma
2703
2732
  switch (name_token.type()) {
2704
2733
  case Token::Type::BareName:
2705
2734
  case Token::Type::Constant:
2735
+ case Token::Type::OperatorName:
2706
2736
  name = name_token.literal_string();
2707
2737
  advance();
2708
2738
  break;
@@ -2745,7 +2775,11 @@ SharedPtr<Node> Parser::parse_unless(LocalsHashmap &locals) {
2745
2775
  if (condition->type() == Node::Type::Regexp) {
2746
2776
  condition = new MatchNode { condition->token(), condition.static_cast_as<RegexpNode>() };
2747
2777
  }
2748
- next_expression();
2778
+ if (current_token().type() == Token::Type::ThenKeyword) {
2779
+ advance(); // then
2780
+ } else {
2781
+ next_expression();
2782
+ }
2749
2783
  SharedPtr<Node> false_expr = parse_if_body(locals);
2750
2784
  SharedPtr<Node> true_expr;
2751
2785
  if (current_token().is_else_keyword()) {
@@ -2762,11 +2796,15 @@ SharedPtr<Node> Parser::parse_unless(LocalsHashmap &locals) {
2762
2796
  SharedPtr<Node> Parser::parse_while(LocalsHashmap &locals) {
2763
2797
  auto token = current_token();
2764
2798
  advance();
2765
- SharedPtr<Node> condition = parse_expression(Precedence::LOWEST, locals);
2799
+ SharedPtr<Node> condition = parse_expression(Precedence::LOWEST, locals, IterAllow::CURLY_ONLY);
2766
2800
  if (condition->type() == Node::Type::Regexp) {
2767
2801
  condition = new MatchNode { condition->token(), condition.static_cast_as<RegexpNode>() };
2768
2802
  }
2769
- next_expression();
2803
+ if (current_token().type() == Token::Type::DoKeyword) {
2804
+ advance();
2805
+ } else {
2806
+ next_expression();
2807
+ }
2770
2808
  SharedPtr<BlockNode> body = parse_body(locals, Precedence::LOWEST);
2771
2809
  expect(Token::Type::EndKeyword, "while end");
2772
2810
  advance();
@@ -2824,6 +2862,8 @@ Parser::parse_null_fn Parser::null_denotation(Token::Type type) {
2824
2862
  return &Parser::parse_group;
2825
2863
  case Type::LCurlyBrace:
2826
2864
  return &Parser::parse_hash;
2865
+ case Type::LINEKeyword:
2866
+ return &Parser::parse_line_constant;
2827
2867
  case Type::BareName:
2828
2868
  case Type::ClassVariable:
2829
2869
  case Type::Constant:
@@ -2907,7 +2947,7 @@ Parser::parse_left_fn Parser::left_denotation(Token &token, SharedPtr<Node> left
2907
2947
  using Type = Token::Type;
2908
2948
  switch (token.type()) {
2909
2949
  case Type::Equal:
2910
- if (precedence == Precedence::ARRAY || precedence == Precedence::BARE_CALL_ARG || precedence == Precedence::CALL_ARG)
2950
+ if (precedence == Precedence::ARRAY || precedence == Precedence::HASH || precedence == Precedence::BARE_CALL_ARG || precedence == Precedence::CALL_ARG)
2911
2951
  return &Parser::parse_assignment_expression_without_multiple_values;
2912
2952
  else
2913
2953
  return &Parser::parse_assignment_expression;
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natalie_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Morgan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-12 00:00:00.000000000 Z
11
+ date: 2022-10-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: NatalieParser is a zero-dependency, from-scratch, hand-written recursive
14
14
  descent parser for the Ruby Programming Language.