natalie_parser 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +27 -0
  3. data/README.md +11 -4
  4. data/Rakefile +12 -5
  5. data/ext/natalie_parser/mri_creator.hpp +25 -7
  6. data/include/natalie_parser/creator/debug_creator.hpp +13 -3
  7. data/include/natalie_parser/creator.hpp +4 -2
  8. data/include/natalie_parser/node/array_pattern_node.hpp +20 -2
  9. data/include/natalie_parser/node/bignum_node.hpp +5 -1
  10. data/include/natalie_parser/node/case_in_node.hpp +5 -2
  11. data/include/natalie_parser/node/complex_node.hpp +49 -0
  12. data/include/natalie_parser/node/fixnum_node.hpp +5 -1
  13. data/include/natalie_parser/node/float_node.hpp +4 -0
  14. data/include/natalie_parser/node/forward_args_node.hpp +26 -0
  15. data/include/natalie_parser/node/hash_pattern_node.hpp +1 -0
  16. data/include/natalie_parser/node/infix_op_node.hpp +1 -1
  17. data/include/natalie_parser/node/iter_node.hpp +1 -1
  18. data/include/natalie_parser/node/keyword_rest_pattern_node.hpp +43 -0
  19. data/include/natalie_parser/node/node.hpp +7 -1
  20. data/include/natalie_parser/node/nth_ref_node.hpp +1 -1
  21. data/include/natalie_parser/node/rational_node.hpp +45 -0
  22. data/include/natalie_parser/node.hpp +4 -0
  23. data/include/natalie_parser/parser.hpp +14 -1
  24. data/include/natalie_parser/token.hpp +62 -13
  25. data/lib/natalie_parser/version.rb +1 -1
  26. data/src/lexer/interpolated_string_lexer.cpp +9 -9
  27. data/src/lexer/regexp_lexer.cpp +7 -7
  28. data/src/lexer/word_array_lexer.cpp +13 -13
  29. data/src/lexer.cpp +210 -181
  30. data/src/node/begin_rescue_node.cpp +1 -1
  31. data/src/node/interpolated_regexp_node.cpp +1 -1
  32. data/src/node/node.cpp +7 -0
  33. data/src/node/node_with_args.cpp +1 -0
  34. data/src/parser.cpp +261 -91
  35. metadata +6 -2
@@ -36,6 +36,7 @@ public:
36
36
  Comma,
37
37
  Comment,
38
38
  Comparison,
39
+ Complex,
39
40
  Constant,
40
41
  ConstantResolution,
41
42
  DefinedKeyword,
@@ -115,6 +116,8 @@ public:
115
116
  PipePipeEqual,
116
117
  Plus,
117
118
  PlusEqual,
119
+ Rational,
120
+ RationalComplex,
118
121
  RCurlyBrace,
119
122
  RBracket,
120
123
  RedoKeyword,
@@ -155,58 +158,64 @@ public:
155
158
 
156
159
  Token() { }
157
160
 
158
- Token(Type type, SharedPtr<String> file, size_t line, size_t column)
161
+ Token(Type type, SharedPtr<String> file, size_t line, size_t column, bool whitespace_precedes)
159
162
  : m_type { type }
160
163
  , m_file { file }
161
164
  , m_line { line }
162
- , m_column { column } {
165
+ , m_column { column }
166
+ , m_whitespace_precedes { whitespace_precedes } {
163
167
  assert(file);
164
168
  }
165
169
 
166
- Token(Type type, const char *literal, SharedPtr<String> file, size_t line, size_t column)
170
+ Token(Type type, const char *literal, SharedPtr<String> file, size_t line, size_t column, bool whitespace_precedes)
167
171
  : m_type { type }
168
172
  , m_literal { new String(literal) }
169
173
  , m_file { file }
170
174
  , m_line { line }
171
- , m_column { column } {
175
+ , m_column { column }
176
+ , m_whitespace_precedes { whitespace_precedes } {
172
177
  assert(literal);
173
178
  assert(file);
174
179
  }
175
180
 
176
- Token(Type type, SharedPtr<String> literal, SharedPtr<String> file, size_t line, size_t column)
181
+ Token(Type type, SharedPtr<String> literal, SharedPtr<String> file, size_t line, size_t column, bool whitespace_precedes)
177
182
  : m_type { type }
178
183
  , m_literal { literal }
179
184
  , m_file { file }
180
185
  , m_line { line }
181
- , m_column { column } {
186
+ , m_column { column }
187
+ , m_whitespace_precedes { whitespace_precedes } {
182
188
  assert(literal);
183
189
  assert(file);
184
190
  }
185
191
 
186
- Token(Type type, char literal, SharedPtr<String> file, size_t line, size_t column)
192
+ Token(Type type, char literal, SharedPtr<String> file, size_t line, size_t column, bool whitespace_precedes)
187
193
  : m_type { type }
188
194
  , m_literal { new String(literal) }
189
195
  , m_file { file }
190
196
  , m_line { line }
191
- , m_column { column } {
197
+ , m_column { column }
198
+ , m_whitespace_precedes { whitespace_precedes } {
192
199
  assert(file);
193
200
  }
194
201
 
195
- Token(Type type, long long fixnum, SharedPtr<String> file, size_t line, size_t column)
202
+ Token(Type type, long long fixnum, SharedPtr<String> file, size_t line, size_t column, bool whitespace_precedes)
196
203
  : m_type { type }
197
204
  , m_fixnum { fixnum }
198
205
  , m_file { file }
199
206
  , m_line { line }
200
- , m_column { column } {
207
+ , m_column { column }
208
+ , m_whitespace_precedes { whitespace_precedes } {
201
209
  assert(file);
202
210
  }
203
211
 
204
- Token(Type type, double dbl, SharedPtr<String> file, size_t line, size_t column)
212
+ Token(Type type, double dbl, SharedPtr<String> file, size_t line, size_t column, bool whitespace_precedes)
205
213
  : m_type { type }
206
214
  , m_double { dbl }
207
215
  , m_file { file }
208
216
  , m_line { line }
209
- , m_column { column } {
217
+ , m_column { column }
218
+ , m_whitespace_precedes { whitespace_precedes } {
210
219
  assert(file);
211
220
  }
212
221
 
@@ -286,6 +295,8 @@ public:
286
295
  return "comment";
287
296
  case Type::Comparison:
288
297
  return "<=>";
298
+ case Type::Complex:
299
+ return "complex";
289
300
  case Type::ConstantResolution:
290
301
  return "::";
291
302
  case Type::Constant:
@@ -446,6 +457,10 @@ public:
446
457
  return "+=";
447
458
  case Type::Plus:
448
459
  return "+";
460
+ case Type::Rational:
461
+ return "rational";
462
+ case Type::RationalComplex:
463
+ return "rational_complex";
449
464
  case Type::RCurlyBrace:
450
465
  return "}";
451
466
  case Type::RBracket:
@@ -627,6 +642,7 @@ public:
627
642
  bool is_elsif_keyword() const { return m_type == Type::ElsifKeyword; }
628
643
  bool is_end_keyword() const { return m_type == Type::EndKeyword; }
629
644
  bool is_end_of_expression() const { return m_type == Type::EndKeyword || m_type == Type::RCurlyBrace || m_type == Type::Newline || m_type == Type::Semicolon || m_type == Type::Eof || is_expression_modifier(); }
645
+ bool is_ensure() const { return m_type == Type::EnsureKeyword; }
630
646
  bool is_eof() const { return m_type == Type::Eof; }
631
647
  bool is_end_of_line() const { return m_type == Type::Newline || m_type == Type::Semicolon; }
632
648
  bool is_equal() const { return m_type == Type::Equal; }
@@ -635,6 +651,7 @@ public:
635
651
  bool is_lparen() const { return m_type == Type::LParen; }
636
652
  bool is_newline() const { return m_type == Type::Newline; }
637
653
  bool is_rbracket() const { return m_type == Type::RBracket; }
654
+ bool is_rescue() const { return m_type == Type::RescueKeyword; }
638
655
  bool is_rparen() const { return m_type == Type::RParen; }
639
656
  bool is_semicolon() const { return m_type == Type::Semicolon; }
640
657
  bool is_splat() const { return m_type == Type::Star || m_type == Type::StarStar; }
@@ -653,6 +670,7 @@ public:
653
670
  case Token::Type::RParen:
654
671
  case Token::Type::SafeNavigation:
655
672
  case Token::Type::TernaryColon:
673
+ case Token::Type::ThenKeyword:
656
674
  return true;
657
675
  default:
658
676
  return false;
@@ -671,7 +689,6 @@ public:
671
689
  case Token::Type::Comparison:
672
690
  case Token::Type::ConstantResolution:
673
691
  case Token::Type::Dot:
674
- case Token::Type::DotDot:
675
692
  case Token::Type::Equal:
676
693
  case Token::Type::EqualEqual:
677
694
  case Token::Type::EqualEqualEqual:
@@ -754,6 +771,7 @@ public:
754
771
  case Token::Type::LBracketRBracket:
755
772
  case Token::Type::LINEKeyword:
756
773
  case Token::Type::LParen:
774
+ case Token::Type::Minus:
757
775
  case Token::Type::NilKeyword:
758
776
  case Token::Type::Not:
759
777
  case Token::Type::NotKeyword:
@@ -761,6 +779,7 @@ public:
761
779
  case Token::Type::PercentLowerW:
762
780
  case Token::Type::PercentUpperI:
763
781
  case Token::Type::PercentUpperW:
782
+ case Token::Type::Plus:
764
783
  case Token::Type::SelfKeyword:
765
784
  case Token::Type::Star:
766
785
  case Token::Type::String:
@@ -804,6 +823,36 @@ public:
804
823
  }
805
824
  }
806
825
 
826
+ bool can_be_complex_or_rational() const {
827
+ switch (m_type) {
828
+ case Type::Bignum:
829
+ case Type::Fixnum:
830
+ case Type::Float:
831
+ return true;
832
+ default:
833
+ return false;
834
+ }
835
+ }
836
+
837
+ bool can_be_range_arg_token() const {
838
+ if (is_closing_token())
839
+ return false;
840
+ if (is_semicolon() || is_eof())
841
+ return false;
842
+ switch (m_type) {
843
+ case Type::ElseKeyword:
844
+ case Type::ElsifKeyword:
845
+ case Type::EndKeyword:
846
+ case Type::InKeyword:
847
+ case Type::ThenKeyword:
848
+ case Type::WhenKeyword:
849
+ // TODO: likely many more cases!
850
+ return false;
851
+ default:
852
+ return true;
853
+ }
854
+ }
855
+
807
856
  void set_literal(const char *literal) { m_literal = new String(literal); }
808
857
  void set_literal(SharedPtr<String> literal) { m_literal = literal; }
809
858
  void set_literal(String literal) { m_literal = new String(literal); }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '1.0.0'
4
+ VERSION = '1.2.0'
5
5
  end
@@ -14,7 +14,7 @@ Token InterpolatedStringLexer::build_next_token() {
14
14
  case State::EndToken:
15
15
  return finish();
16
16
  case State::Done:
17
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
17
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
18
18
  }
19
19
  TM_UNREACHABLE();
20
20
  }
@@ -26,13 +26,13 @@ Token InterpolatedStringLexer::consume_string() {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
29
- return Token { result.second, current_char(), m_file, m_cursor_line, m_cursor_column };
29
+ return Token { result.second, current_char(), m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
30
30
  } else if (c == '#' && peek() == '{') {
31
31
  if (buf->is_empty()) {
32
32
  advance(2);
33
33
  return start_evaluation();
34
34
  }
35
- auto token = Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
35
+ auto token = Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
36
36
  advance(2);
37
37
  m_state = State::EvaluateBegin;
38
38
  return token;
@@ -49,7 +49,7 @@ Token InterpolatedStringLexer::consume_string() {
49
49
  return finish();
50
50
  } else {
51
51
  m_state = State::EndToken;
52
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
52
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
53
53
  }
54
54
  } else {
55
55
  buf->append_char(c);
@@ -62,27 +62,27 @@ Token InterpolatedStringLexer::consume_string() {
62
62
  if (m_stop_char == 0) {
63
63
  advance();
64
64
  m_state = State::EndToken;
65
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
65
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
66
66
  }
67
67
 
68
- return Token { Token::Type::UnterminatedString, buf, m_file, m_token_line, m_token_column };
68
+ return Token { Token::Type::UnterminatedString, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
69
69
  }
70
70
 
71
71
  Token InterpolatedStringLexer::start_evaluation() {
72
72
  m_nested_lexer = new Lexer { *this, '{', '}' };
73
73
  m_state = State::EvaluateEnd;
74
- return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column };
74
+ return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column, m_whitespace_precedes };
75
75
  }
76
76
 
77
77
  Token InterpolatedStringLexer::stop_evaluation() {
78
78
  advance(); // }
79
79
  m_state = State::InProgress;
80
- return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column };
80
+ return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column, m_whitespace_precedes };
81
81
  }
82
82
 
83
83
  Token InterpolatedStringLexer::finish() {
84
84
  m_state = State::Done;
85
- return Token { m_end_type, m_file, m_cursor_line, m_cursor_column };
85
+ return Token { m_end_type, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
86
86
  }
87
87
 
88
88
  };
@@ -11,7 +11,7 @@ Token RegexpLexer::build_next_token() {
11
11
  m_nested_lexer = new Lexer { *this };
12
12
  m_nested_lexer->set_stop_char('}');
13
13
  m_state = State::EvaluateEnd;
14
- return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column };
14
+ return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column, m_whitespace_precedes };
15
15
  case State::EvaluateEnd:
16
16
  advance(); // }
17
17
  if (current_char() == m_stop_char) {
@@ -21,16 +21,16 @@ Token RegexpLexer::build_next_token() {
21
21
  } else {
22
22
  m_state = State::InProgress;
23
23
  }
24
- return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column };
24
+ return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column, m_whitespace_precedes };
25
25
  case State::EndToken: {
26
26
  m_state = State::Done;
27
- auto token = Token { Token::Type::InterpolatedRegexpEnd, m_file, m_cursor_line, m_cursor_column };
27
+ auto token = Token { Token::Type::InterpolatedRegexpEnd, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
28
28
  if (m_options && !m_options->is_empty())
29
29
  token.set_literal(m_options);
30
30
  return token;
31
31
  }
32
32
  case State::Done:
33
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
33
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
34
34
  }
35
35
  TM_UNREACHABLE();
36
36
  }
@@ -55,7 +55,7 @@ Token RegexpLexer::consume_regexp() {
55
55
  }
56
56
  advance();
57
57
  } else if (c == '#' && peek() == '{') {
58
- auto token = Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
58
+ auto token = Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
59
59
  buf = new String;
60
60
  advance(2);
61
61
  m_state = State::EvaluateBegin;
@@ -72,14 +72,14 @@ Token RegexpLexer::consume_regexp() {
72
72
  } else {
73
73
  m_options = consume_options();
74
74
  m_state = State::EndToken;
75
- return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
75
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
76
76
  }
77
77
  } else {
78
78
  buf->append_char(c);
79
79
  advance();
80
80
  }
81
81
  }
82
- return Token { Token::Type::UnterminatedRegexp, buf, m_file, m_token_line, m_token_column };
82
+ return Token { Token::Type::UnterminatedRegexp, buf, m_file, m_token_line, m_token_column, m_whitespace_precedes };
83
83
  }
84
84
 
85
85
  String *RegexpLexer::consume_options() {
@@ -11,7 +11,7 @@ Token WordArrayLexer::build_next_token() {
11
11
  return consume_array();
12
12
  case State::DynamicStringBegin:
13
13
  m_state = State::EvaluateBegin;
14
- return Token { Token::Type::String, m_buffer, m_file, m_token_line, m_token_column };
14
+ return Token { Token::Type::String, m_buffer, m_file, m_token_line, m_token_column, m_whitespace_precedes };
15
15
  case State::DynamicStringEnd:
16
16
  if (current_char() == m_stop_char) {
17
17
  advance();
@@ -19,18 +19,18 @@ Token WordArrayLexer::build_next_token() {
19
19
  } else {
20
20
  m_state = State::InProgress;
21
21
  }
22
- return Token { Token::Type::InterpolatedStringEnd, m_file, m_token_line, m_token_column };
22
+ return Token { Token::Type::InterpolatedStringEnd, m_file, m_token_line, m_token_column, m_whitespace_precedes };
23
23
  case State::EvaluateBegin:
24
24
  return start_evaluation();
25
25
  case State::EvaluateEnd:
26
26
  advance(); // }
27
27
  m_state = State::DynamicStringInProgress;
28
- return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column };
28
+ return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column, m_whitespace_precedes };
29
29
  case State::EndToken:
30
30
  m_state = State::Done;
31
- return Token { Token::Type::RBracket, m_file, m_cursor_line, m_cursor_column };
31
+ return Token { Token::Type::RBracket, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
32
32
  case State::Done:
33
- return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
33
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
34
34
  }
35
35
  TM_UNREACHABLE();
36
36
  }
@@ -70,7 +70,7 @@ Token WordArrayLexer::consume_array() {
70
70
  return dynamic_string_finish();
71
71
  }
72
72
  if (!m_buffer->is_empty()) {
73
- auto token = Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column };
73
+ auto token = Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
74
74
  advance();
75
75
  return token;
76
76
  }
@@ -97,38 +97,38 @@ Token WordArrayLexer::consume_array() {
97
97
  }
98
98
  }
99
99
 
100
- return Token { Token::Type::UnterminatedWordArray, m_buffer, m_file, m_token_line, m_token_column };
100
+ return Token { Token::Type::UnterminatedWordArray, m_buffer, m_file, m_token_line, m_token_column, m_whitespace_precedes };
101
101
  }
102
102
 
103
103
  Token WordArrayLexer::in_progress_start_dynamic_string() {
104
104
  advance(2); // #{
105
105
  m_state = State::DynamicStringBegin;
106
- return Token { Token::Type::InterpolatedStringBegin, m_file, m_cursor_line, m_cursor_column };
106
+ return Token { Token::Type::InterpolatedStringBegin, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
107
107
  }
108
108
 
109
109
  Token WordArrayLexer::start_evaluation() {
110
110
  m_nested_lexer = new Lexer { *this, '{', '}' };
111
111
  m_state = State::EvaluateEnd;
112
- return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column };
112
+ return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column, m_whitespace_precedes };
113
113
  }
114
114
 
115
115
  Token WordArrayLexer::dynamic_string_finish() {
116
116
  if (!m_buffer->is_empty()) {
117
117
  m_state = State::DynamicStringEnd;
118
- return Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column };
118
+ return Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
119
119
  }
120
120
  m_state = State::InProgress;
121
- return Token { Token::Type::InterpolatedStringEnd, m_file, m_token_line, m_token_column };
121
+ return Token { Token::Type::InterpolatedStringEnd, m_file, m_token_line, m_token_column, m_whitespace_precedes };
122
122
  }
123
123
 
124
124
  Token WordArrayLexer::in_progress_finish() {
125
125
  advance(); // ) or ] or } or whatever
126
126
  if (!m_buffer->is_empty()) {
127
127
  m_state = State::EndToken;
128
- return Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column };
128
+ return Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
129
129
  }
130
130
  m_state = State::Done;
131
- return Token { Token::Type::RBracket, m_file, m_cursor_line, m_cursor_column };
131
+ return Token { Token::Type::RBracket, m_file, m_cursor_line, m_cursor_column, m_whitespace_precedes };
132
132
  }
133
133
 
134
134
  };