natalie_parser 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -10,7 +10,7 @@ SharedPtr<Node> BeginRescueNode::name_to_node() const {
10
10
  token(),
11
11
  m_name.static_cast_as<Node>(),
12
12
  new IdentifierNode {
13
- Token { Token::Type::GlobalVariable, "$!", file(), line(), column() },
13
+ Token { Token::Type::GlobalVariable, "$!", file(), line(), column(), false },
14
14
  false },
15
15
  };
16
16
  }
data/src/node/node.cpp CHANGED
@@ -1,4 +1,5 @@
1
1
  #include "natalie_parser/node.hpp"
2
+ #include "natalie_parser/creator/debug_creator.hpp"
2
3
 
3
4
  namespace NatalieParser {
4
5
 
@@ -7,4 +8,10 @@ BlockNode &Node::as_block_node() {
7
8
  return *static_cast<BlockNode *>(this);
8
9
  }
9
10
 
11
+ void Node::debug() {
12
+ DebugCreator creator;
13
+ transform(&creator);
14
+ printf("DEBUG[type=%d]: %s\n", (int)type(), creator.to_string().c_str());
15
+ }
16
+
10
17
  }
data/src/parser.cpp CHANGED
@@ -249,7 +249,7 @@ SharedPtr<BlockNode> Parser::parse_body(LocalsHashmap &locals, Precedence preced
249
249
  validate_current_token();
250
250
  skip_newlines();
251
251
  while (!current_token().is_eof() && !is_end(current_token().type())) {
252
- if (allow_rescue && current_token().type() == Token::Type::RescueKeyword) {
252
+ if (allow_rescue && (current_token().is_rescue() || current_token().is_ensure())) {
253
253
  auto token = body->token();
254
254
  SharedPtr<BeginNode> begin_node = new BeginNode { body->token(), body };
255
255
  parse_rest_of_begin(begin_node.ref(), locals);
@@ -313,7 +313,7 @@ SharedPtr<SymbolNode> Parser::parse_alias_arg(LocalsHashmap &locals, const char
313
313
  // def bar; end
314
314
  //
315
315
  // So, we'll put the newline back.
316
- m_tokens->insert(m_index, Token { Token::Type::Newline, token.file(), token.line(), token.column() });
316
+ m_tokens->insert(m_index, Token { Token::Type::Newline, token.file(), token.line(), token.column(), token.whitespace_precedes() });
317
317
  }
318
318
  return new SymbolNode { token, new String(token.type_value()) };
319
319
  } else {
@@ -624,6 +624,11 @@ SharedPtr<Node> Parser::parse_case_in_pattern(LocalsHashmap &locals) {
624
624
  advance();
625
625
  node = new IdentifierNode { token, true };
626
626
  break;
627
+ case Token::Type::Bignum:
628
+ case Token::Type::Fixnum:
629
+ case Token::Type::Float:
630
+ node = parse_lit(locals);
631
+ break;
627
632
  case Token::Type::Caret:
628
633
  advance();
629
634
  expect(Token::Type::BareName, "pinned variable name");
@@ -633,12 +638,18 @@ SharedPtr<Node> Parser::parse_case_in_pattern(LocalsHashmap &locals) {
633
638
  case Token::Type::Constant:
634
639
  node = parse_constant(locals);
635
640
  break;
641
+ case Token::Type::DotDot:
642
+ case Token::Type::DotDotDot:
643
+ node = parse_beginless_range(locals);
644
+ break;
636
645
  case Token::Type::LBracketRBracket:
637
646
  advance();
638
647
  node = new ArrayPatternNode { token };
639
648
  break;
649
+ case Token::Type::InterpolatedStringBegin:
650
+ node = parse_interpolated_string(locals);
651
+ break;
640
652
  case Token::Type::LBracket: {
641
- // TODO: might need to keep track of and pass along precedence value?
642
653
  advance();
643
654
  SharedPtr<ArrayPatternNode> array = new ArrayPatternNode { token };
644
655
  if (current_token().is_rbracket()) {
@@ -649,7 +660,10 @@ SharedPtr<Node> Parser::parse_case_in_pattern(LocalsHashmap &locals) {
649
660
  array->add_node(parse_case_in_pattern(locals));
650
661
  while (current_token().is_comma()) {
651
662
  advance();
652
- array->add_node(parse_case_in_pattern(locals));
663
+ if (current_token().is_rbracket())
664
+ array->add_node(new SplatNode { current_token() });
665
+ else
666
+ array->add_node(parse_case_in_pattern(locals));
653
667
  }
654
668
  expect(Token::Type::RBracket, "array pattern closing bracket");
655
669
  advance();
@@ -659,47 +673,71 @@ SharedPtr<Node> Parser::parse_case_in_pattern(LocalsHashmap &locals) {
659
673
  case Token::Type::LCurlyBrace: {
660
674
  advance();
661
675
  SharedPtr<HashPatternNode> hash = new HashPatternNode { token };
676
+ node = hash.static_cast_as<Node>();
662
677
  if (current_token().type() == Token::Type::RCurlyBrace) {
663
678
  advance();
664
679
  node = hash.static_cast_as<Node>();
665
680
  break;
666
681
  }
667
- expect(Token::Type::SymbolKey, "hash pattern symbol key");
668
- hash->add_node(parse_symbol(locals));
669
- hash->add_node(parse_case_in_pattern(locals));
682
+
683
+ auto add_pair = [&]() {
684
+ auto key = parse_case_in_pattern_hash_symbol_key(locals);
685
+ hash->add_node(key);
686
+ if (key->type() == Node::Type::KeywordRestPattern) {
687
+ // nothing else to do
688
+ } else if (current_token().type() == Token::Type::RCurlyBrace || current_token().type() == Token::Type::Comma) {
689
+ if (key->type() == Node::Type::SymbolKey)
690
+ locals.set(key.static_cast_as<SymbolKeyNode>()->name().ref());
691
+ hash->add_node(new NilNode { current_token() });
692
+ } else {
693
+ hash->add_node(parse_case_in_pattern(locals));
694
+ }
695
+ };
696
+
697
+ add_pair();
698
+
670
699
  while (current_token().is_comma()) {
671
700
  advance(); // ,
672
- expect(Token::Type::Symbol, "hash pattern symbol");
673
- hash->add_node(parse_symbol(locals));
674
- hash->add_node(parse_case_in_pattern(locals));
701
+ add_pair();
675
702
  }
703
+
676
704
  expect(Token::Type::RCurlyBrace, "hash pattern closing brace");
677
705
  advance();
678
- node = hash.static_cast_as<Node>();
679
706
  break;
680
707
  }
681
- case Token::Type::Bignum:
682
- case Token::Type::Fixnum:
683
- case Token::Type::Float:
684
- node = parse_lit(locals);
708
+ case Token::Type::LParen:
709
+ advance(); // (
710
+ node = parse_case_in_pattern(locals);
711
+ expect(Token::Type::RParen, "closing paren for pattern");
712
+ advance();
713
+ break;
714
+ case Token::Type::Minus:
715
+ node = parse_unary_operator(locals);
716
+ break;
717
+ case Token::Type::NilKeyword:
718
+ node = parse_nil(locals);
685
719
  break;
686
720
  case Token::Type::Star: {
687
- auto splat_token = current_token();
688
- advance();
689
- SharedPtr<ArrayPatternNode> array = new ArrayPatternNode { token };
721
+ advance(); // *
690
722
  switch (current_token().type()) {
691
723
  case Token::Type::BareName:
692
724
  case Token::Type::Constant: {
693
- auto name = new IdentifierNode { current_token(), true };
694
- name->prepend_to_name('*');
695
- array->add_node(name);
725
+ SharedPtr<String> name = current_token().literal_string();
726
+ auto symbol = new SymbolNode { current_token(), name };
727
+ node = new SplatNode { symbol->token(), symbol };
696
728
  advance();
697
729
  break;
698
730
  }
699
731
  default:
700
- array->add_node(new SplatNode { splat_token });
732
+ node = new SplatNode { current_token() };
701
733
  }
702
- node = array.static_cast_as<Node>();
734
+ break;
735
+ }
736
+ case Token::Type::StarStar: {
737
+ SharedPtr<HashPatternNode> hash = new HashPatternNode { token };
738
+ auto key = parse_case_in_pattern_hash_symbol_key(locals);
739
+ hash->add_node(key);
740
+ node = hash.static_cast_as<Node>();
703
741
  break;
704
742
  }
705
743
  case Token::Type::String:
@@ -711,6 +749,11 @@ SharedPtr<Node> Parser::parse_case_in_pattern(LocalsHashmap &locals) {
711
749
  default:
712
750
  throw_unexpected("case in pattern");
713
751
  }
752
+
753
+ if (current_token().type() == Token::Type::DotDot || current_token().type() == Token::Type::DotDotDot) {
754
+ node = parse_range_expression(node, locals);
755
+ }
756
+
714
757
  token = current_token();
715
758
  if (token.is_hash_rocket()) {
716
759
  advance();
@@ -723,26 +766,67 @@ SharedPtr<Node> Parser::parse_case_in_pattern(LocalsHashmap &locals) {
723
766
  return node;
724
767
  }
725
768
 
769
+ SharedPtr<Node> Parser::parse_case_in_pattern_alternation(LocalsHashmap &locals) {
770
+ SharedPtr<ArrayPatternNode> array_pattern = new ArrayPatternNode { current_token() };
771
+ array_pattern->add_node(parse_case_in_pattern(locals));
772
+ while (current_token().is_comma()) {
773
+ advance();
774
+ array_pattern->add_node(parse_case_in_pattern(locals));
775
+ }
776
+ if (array_pattern->nodes().size() == 1)
777
+ return array_pattern->nodes().first();
778
+ return array_pattern.static_cast_as<Node>();
779
+ }
780
+
781
+ SharedPtr<Node> Parser::parse_case_in_pattern_hash_symbol_key(LocalsHashmap &locals) {
782
+ auto token = current_token();
783
+ SharedPtr<Node> node;
784
+ switch (token.type()) {
785
+ case Token::Type::InterpolatedStringBegin:
786
+ node = parse_interpolated_string(locals);
787
+ if (node->type() != Node::Type::SymbolKey)
788
+ throw_unexpected(token, "hash pattern symbol key");
789
+ break;
790
+ case Token::Type::StarStar:
791
+ advance(); // **
792
+ switch (current_token().type()) {
793
+ case Token::Type::NilKeyword:
794
+ node = new KeywordRestPatternNode { token, current_token().type_value() };
795
+ advance();
796
+ break;
797
+ case Token::Type::BareName: {
798
+ auto name = current_token().literal_string();
799
+ node = new KeywordRestPatternNode { token, name };
800
+ locals.set(name.ref());
801
+ advance();
802
+ break;
803
+ }
804
+ default:
805
+ node = new KeywordRestPatternNode { token };
806
+ break;
807
+ }
808
+ break;
809
+ case Token::Type::SymbolKey:
810
+ node = parse_symbol_key(locals);
811
+ break;
812
+ default:
813
+ throw_unexpected("hash pattern symbol key");
814
+ }
815
+ return node;
816
+ }
817
+
726
818
  SharedPtr<Node> Parser::parse_case_in_patterns(LocalsHashmap &locals) {
727
819
  Vector<SharedPtr<Node>> patterns;
728
- patterns.push(parse_case_in_pattern(locals));
820
+ auto pattern = parse_case_in_pattern_alternation(locals);
821
+ if (pattern->type() == Node::Type::Splat)
822
+ pattern = new ArrayPatternNode { pattern->token(), pattern };
823
+ patterns.push(pattern);
729
824
  while (current_token().type() == Token::Type::Pipe) {
730
825
  advance();
731
- patterns.push(parse_case_in_pattern(locals));
732
- }
733
- while (current_token().is_comma()) {
734
- advance();
735
- auto last_pattern = patterns.last();
736
- auto next_pattern = parse_case_in_pattern(locals);
737
- if (last_pattern->type() == Node::Type::ArrayPattern) {
738
- last_pattern.static_cast_as<ArrayPatternNode>()->add_node(next_pattern);
739
- } else {
740
- patterns.pop();
741
- auto array_pattern = new ArrayPatternNode { last_pattern->token() };
742
- array_pattern->add_node(last_pattern);
743
- array_pattern->add_node(next_pattern);
744
- patterns.push(array_pattern);
745
- }
826
+ auto pattern = parse_case_in_pattern_alternation(locals);
827
+ if (pattern->type() == Node::Type::Splat)
828
+ pattern = new ArrayPatternNode { pattern->token(), pattern };
829
+ patterns.push(pattern);
746
830
  }
747
831
  assert(patterns.size() > 0);
748
832
  if (patterns.size() == 1) {
@@ -849,13 +933,12 @@ SharedPtr<Node> Parser::parse_assignment_identifier(bool allow_splat, LocalsHash
849
933
  case Token::Type::Star: {
850
934
  if (!allow_splat)
851
935
  expect(Token::Type::BareName, "assignment identifier");
852
- auto splat_token = current_token();
853
936
  advance();
854
937
  if (current_token().is_assignable()) {
855
938
  auto id = parse_assignment_identifier(false, locals);
856
- node = new SplatNode { splat_token, id };
939
+ node = new SplatNode { token, id };
857
940
  } else {
858
- node = new SplatNode { splat_token };
941
+ node = new SplatNode { token };
859
942
  }
860
943
  break;
861
944
  }
@@ -1886,11 +1969,14 @@ SharedPtr<Node> Parser::parse_unary_operator(LocalsHashmap &locals) {
1886
1969
  advance();
1887
1970
  auto precedence = get_precedence(token);
1888
1971
  auto receiver = parse_expression(precedence, locals);
1972
+
1889
1973
  if ((token.type() == Token::Type::Minus || token.type() == Token::Type::Plus) && receiver->is_numeric()) {
1890
1974
  switch (receiver->type()) {
1891
1975
  case Node::Type::Bignum: {
1892
1976
  if (token.type() == Token::Type::Minus) {
1893
1977
  auto num = receiver.static_cast_as<BignumNode>();
1978
+ if (num->negative())
1979
+ break;
1894
1980
  num->negate();
1895
1981
  return num.static_cast_as<Node>();
1896
1982
  }
@@ -1899,6 +1985,8 @@ SharedPtr<Node> Parser::parse_unary_operator(LocalsHashmap &locals) {
1899
1985
  case Node::Type::Fixnum: {
1900
1986
  if (token.type() == Token::Type::Minus) {
1901
1987
  auto num = receiver.static_cast_as<FixnumNode>();
1988
+ if (num->negative())
1989
+ break;
1902
1990
  num->negate();
1903
1991
  return num.static_cast_as<Node>();
1904
1992
  }
@@ -1907,6 +1995,8 @@ SharedPtr<Node> Parser::parse_unary_operator(LocalsHashmap &locals) {
1907
1995
  case Node::Type::Float: {
1908
1996
  if (token.type() == Token::Type::Minus) {
1909
1997
  auto num = receiver.static_cast_as<FloatNode>();
1998
+ if (num->negative())
1999
+ break;
1910
2000
  num->negate();
1911
2001
  return num.static_cast_as<Node>();
1912
2002
  }
@@ -2100,6 +2190,12 @@ SharedPtr<Node> Parser::parse_iter_expression(SharedPtr<Node> left, LocalsHashma
2100
2190
  bool curly_brace = current_token().type() == Token::Type::LCurlyBrace;
2101
2191
  bool has_args = false;
2102
2192
  auto args = Vector<SharedPtr<Node>> {};
2193
+
2194
+ if (curly_brace) {
2195
+ if (left->type() == Node::Type::Call && !left.static_cast_as<CallNode>()->args().is_empty() && !previous_token().is_rparen())
2196
+ throw_unexpected("nearest object cannot accept a { ... } block");
2197
+ }
2198
+
2103
2199
  if (left->type() == Node::Type::StabbyProc) {
2104
2200
  advance(); // { or do
2105
2201
  auto stabby_proc_node = left.static_cast_as<StabbyProcNode>();
@@ -2193,6 +2289,15 @@ SharedPtr<NodeWithArgs> Parser::to_node_with_args(SharedPtr<Node> node) {
2193
2289
  };
2194
2290
  return call_node;
2195
2291
  }
2292
+ case Node::Type::Colon2: {
2293
+ auto colon2 = node.static_cast_as<Colon2Node>();
2294
+ auto call_node = new CallNode {
2295
+ colon2->token(),
2296
+ colon2->left(),
2297
+ colon2->name(),
2298
+ };
2299
+ return call_node;
2300
+ }
2196
2301
  case Node::Type::Call:
2197
2302
  case Node::Type::SafeCall:
2198
2303
  case Node::Type::Super:
@@ -2205,7 +2310,7 @@ SharedPtr<NodeWithArgs> Parser::to_node_with_args(SharedPtr<Node> node) {
2205
2310
  }
2206
2311
 
2207
2312
  void Parser::parse_call_args(NodeWithArgs &node, LocalsHashmap &locals, bool bare, Token::Type closing_token_type) {
2208
- if (node.can_accept_a_block())
2313
+ if (bare && node.can_accept_a_block())
2209
2314
  m_call_depth.last()++;
2210
2315
  auto arg = parse_expression(bare ? Precedence::BARE_CALL_ARG : Precedence::CALL_ARG, locals);
2211
2316
  if (current_token().is_hash_rocket() || arg->is_symbol_key()) {
@@ -2228,7 +2333,7 @@ void Parser::parse_call_args(NodeWithArgs &node, LocalsHashmap &locals, bool bar
2228
2333
  }
2229
2334
  }
2230
2335
  }
2231
- if (node.can_accept_a_block())
2336
+ if (bare && node.can_accept_a_block())
2232
2337
  m_call_depth.last()--;
2233
2338
  }
2234
2339
 
@@ -2472,15 +2577,19 @@ SharedPtr<Node> Parser::parse_proc_call_expression(SharedPtr<Node> left, LocalsH
2472
2577
 
2473
2578
  SharedPtr<Node> Parser::parse_range_expression(SharedPtr<Node> left, LocalsHashmap &locals) {
2474
2579
  auto token = current_token();
2475
- advance();
2580
+ advance(); // .. or ...
2581
+ skip_newlines();
2476
2582
  SharedPtr<Node> right;
2477
- try {
2583
+ if (current_token().can_be_range_arg_token()) {
2478
2584
  right = parse_expression(Precedence::RANGE, locals);
2479
- } catch (SyntaxError &e) {
2480
- // NOTE: I'm not sure if this is the "right" way to handle an endless range,
2481
- // but it seems to be effective for the tests I threw at it. ¯\_(ツ)_/¯
2585
+ } else {
2586
+ // endless range
2482
2587
  right = new NilNode { token };
2588
+ // HACK: insert a newline here so subsequent expressions parse ok
2589
+ if (!current_token().can_follow_collapsible_newline())
2590
+ m_tokens->insert(m_index, Token { Token::Type::Newline, current_token().file(), current_token().line(), current_token().column(), current_token().whitespace_precedes() });
2483
2591
  }
2592
+
2484
2593
  return new RangeNode { token, left, right, token.type() == Token::Type::DotDotDot };
2485
2594
  }
2486
2595
 
@@ -2767,16 +2876,19 @@ Parser::parse_left_fn Parser::left_denotation(Token &token, SharedPtr<Node> left
2767
2876
  case Type::LeftShift:
2768
2877
  case Type::LessThan:
2769
2878
  case Type::LessThanOrEqual:
2770
- case Type::Minus:
2771
2879
  case Type::NotEqual:
2772
2880
  case Type::Percent:
2773
2881
  case Type::Pipe:
2774
- case Type::Plus:
2775
2882
  case Type::RightShift:
2776
2883
  case Type::Slash:
2777
2884
  case Type::Star:
2778
2885
  case Type::StarStar:
2779
2886
  return &Parser::parse_infix_expression;
2887
+ case Type::Minus:
2888
+ case Type::Plus:
2889
+ if (peek_token().whitespace_precedes() || !left->is_callable())
2890
+ return &Parser::parse_infix_expression;
2891
+ break;
2780
2892
  case Type::DoKeyword:
2781
2893
  case Type::LCurlyBrace:
2782
2894
  return &Parser::parse_iter_expression;
@@ -2843,20 +2955,22 @@ bool Parser::is_first_arg_of_call_without_parens(SharedPtr<Node> left, Token &to
2843
2955
  return left->is_callable() && token.can_be_first_arg_of_implicit_call();
2844
2956
  }
2845
2957
 
2958
+ Token &Parser::previous_token() const {
2959
+ if (m_index > 0)
2960
+ return (*m_tokens)[m_index - 1];
2961
+ return Token::invalid();
2962
+ }
2963
+
2846
2964
  Token &Parser::current_token() const {
2847
- if (m_index < m_tokens->size()) {
2965
+ if (m_index < m_tokens->size())
2848
2966
  return m_tokens->at(m_index);
2849
- } else {
2850
- return Token::invalid();
2851
- }
2967
+ return Token::invalid();
2852
2968
  }
2853
2969
 
2854
2970
  Token &Parser::peek_token() const {
2855
- if (m_index + 1 < m_tokens->size()) {
2971
+ if (m_index + 1 < m_tokens->size())
2856
2972
  return (*m_tokens)[m_index + 1];
2857
- } else {
2858
- return Token::invalid();
2859
- }
2973
+ return Token::invalid();
2860
2974
  }
2861
2975
 
2862
2976
  void Parser::next_expression() {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: natalie_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Morgan
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-06-04 00:00:00.000000000 Z
11
+ date: 2022-06-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: NatalieParser is a zero-dependency, from-scratch, hand-written recursive
14
14
  descent parser for the Ruby Programming Language.
@@ -79,6 +79,7 @@ files:
79
79
  - include/natalie_parser/node/interpolated_symbol_node.hpp
80
80
  - include/natalie_parser/node/iter_node.hpp
81
81
  - include/natalie_parser/node/keyword_arg_node.hpp
82
+ - include/natalie_parser/node/keyword_rest_pattern_node.hpp
82
83
  - include/natalie_parser/node/keyword_splat_node.hpp
83
84
  - include/natalie_parser/node/logical_and_node.hpp
84
85
  - include/natalie_parser/node/logical_or_node.hpp