natalie_parser 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b35f9b98f6bbe4ec3bf8d5d9bce2c7e082504db23f442964f2a58e1ac9e11dc7
4
- data.tar.gz: 1fbf06aa6fae8400855ea0d363d77a1b27d78869266bfb17b0f45cf9b8a30d41
3
+ metadata.gz: 31bee2746c1e7a36eca16194e27467264dfbad5131075090e0b94c675ff150b1
4
+ data.tar.gz: 7f9a2a77aa2e34c56faeb740900779ff4682471cb02dbd51403eb1b7f72b13a7
5
5
  SHA512:
6
- metadata.gz: 936ec0ef70541afd5839dbe4450401d7e8713499c3536d9ac7e53eb5617671dd308eee8e4f458188a9b0890f5a7ed5d855b3011c5539b64eb31639b4140e07fc
7
- data.tar.gz: f6deb35c75e17e5c9fb49c6092e48453e904e7c7d4b515e2c7b5fe7fe69dc656da2faa6096b665b6021557ae8c47844ec65a9e8aca1933712f35d72f4db47c50
6
+ metadata.gz: 74335871743111340267d41631f3eeae0281ea5f93bb2bb4a6536682a55f325162d428d986ba0182e14adfb27e830f9622048fb443f1561ee9611c988b07bd6b
7
+ data.tar.gz: 2fd301c60b32db6c074946adceaa54fca3736f6e64335d5bb68c9ad01f092bf19f1ce2810fb75e4641e4b6cf7ebf7e77165538282107f214fa116254b0d37ac0
data/CHANGELOG.md CHANGED
@@ -1,5 +1,48 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.2.0 (2022-10-24)
4
+
5
+ - FIX: Allow backreference as first arg of implicit call
6
+ - FIX: Exclude `?` and `!` from class/instance/global variable name
7
+ - FIX: Fix parsing of hashes containing assignments
8
+ - FIX: Parse aliases of unary operators and methods named as keywords
9
+ - FIX: Parse alias of keyword-like method to assignment method
10
+ - FIX: Parse alias/undef of assignment methods
11
+ - FIX: Parse backtick as method name
12
+ - FIX: Parse class/module definition in dynamic class/module
13
+ - FIX: Parse `def !`
14
+ - FIX: Parse `def foo::bar`
15
+ - FIX: Parse `foo(<<FOO)`
16
+ - FIX: Parse `for ... do` and `while ... do`
17
+ - FIX: Parse global variables beginning with `$_`
18
+ - FIX: Parse keywords/operators as method names for `::`
19
+ - FIX: Parse __LINE__ constant
20
+ - FIX: Parse more percent strings
21
+ - FIX: Parse more regexes
22
+ - FIX: Parse more stabby procs without parentheses
23
+ - FIX: Parse multiple parameters set to the same default value
24
+ - FIX: Parse parentheses-less stabby proc with keyword arg
25
+ - FIX: Parse undef of methods with special names
26
+ - FIX: Parse `unless ... then`
27
+ - FIX: Parse variable names starting with extended ASCII characters
28
+ - FIX: Raise SyntaxError for `def ====`
29
+ - FIX: Raise syntax error for `def +@.foo`
30
+ - FIX: Tokenize unterminated ternary at EOF
31
+ - FIX: Use lower precedence for block pass
32
+
33
+ ## 2.1.0 (2022-08-12)
34
+
35
+ - FEAT: Parse for loops
36
+ - FIX: Fix bug parsing defined? with parens
37
+ - FIX: Fix parsing of keyword splat next to other keyword args
38
+ - FIX: Parse block pass after bare/implicit hash
39
+ - FIX: Parse if statements with match conditions
40
+ - FIX: Parse regexps with leading space preceeded by keywords
41
+ - FIX: Parse symbol key after super keyword
42
+ - FIX: Parse unless statements with match conditions
43
+ - FIX: Parse while/until statements with match conditions
44
+ - FIX: Reset block association level inside array
45
+
3
46
  ## 2.0.0 (2022-06-24)
4
47
 
5
48
  - FEAT: Differentiate between bare/implicit hash and explicit one
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
80
80
  case NatalieParser::Token::Type::Constant:
81
81
  case NatalieParser::Token::Type::GlobalVariable:
82
82
  case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::OperatorName:
83
84
  case NatalieParser::Token::Type::Symbol:
84
85
  case NatalieParser::Token::Type::SymbolKey: {
85
86
  auto literal = token.literal_string();
@@ -70,9 +70,9 @@ protected:
70
70
  virtual bool skip_whitespace();
71
71
  virtual Token build_next_token();
72
72
  Token consume_symbol();
73
+ SharedPtr<String> consume_word();
73
74
  Token consume_word(Token::Type type);
74
- Token consume_bare_name();
75
- Token consume_constant();
75
+ Token consume_bare_name_or_constant(Token::Type type);
76
76
  Token consume_global_variable();
77
77
  Token consume_heredoc();
78
78
  Token consume_numeric();
@@ -80,11 +80,19 @@ protected:
80
80
  Token consume_nth_ref();
81
81
  long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
82
  long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
- Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
84
84
  Token consume_single_quoted_string(char, char);
85
85
  Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
86
  Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
87
  Token consume_regexp(char start_char, char stop_char);
88
+ Token consume_percent_symbol(char start_char, char stop_char);
89
+ Token consume_interpolated_string(char start_char, char stop_char);
90
+ Token consume_interpolated_shell(char start_char, char stop_char);
91
+ Token consume_percent_lower_w(char start_char, char stop_char);
92
+ Token consume_percent_upper_w(char start_char, char stop_char);
93
+ Token consume_percent_lower_i(char start_char, char stop_char);
94
+ Token consume_percent_upper_i(char start_char, char stop_char);
95
+ Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
88
96
  SharedPtr<String> consume_non_whitespace();
89
97
 
90
98
  void utf32_codepoint_to_utf8(String &buf, long long codepoint);
@@ -95,7 +103,7 @@ protected:
95
103
  bool token_is_first_on_line() const;
96
104
 
97
105
  bool char_can_be_string_or_regexp_delimiter(char c) const {
98
- return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
106
+ return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
107
  }
100
108
 
101
109
  SharedPtr<String> m_input;
@@ -131,5 +139,10 @@ protected:
131
139
  // then increment m_pair_depth
132
140
  char m_start_char { 0 };
133
141
  int m_pair_depth { 0 };
142
+
143
+ size_t m_remaining_method_names { 0 };
144
+ bool m_allow_assignment_method { false };
145
+ Token::Type m_method_name_separator { Token::Type::Invalid };
146
+ Token m_last_method_name {};
134
147
  };
135
148
  }
@@ -0,0 +1,50 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/node/block_node.hpp"
4
+ #include "natalie_parser/node/node.hpp"
5
+ #include "natalie_parser/node/node_with_args.hpp"
6
+
7
+ namespace NatalieParser {
8
+
9
+ using namespace TM;
10
+
11
+ class ForNode : public Node {
12
+ public:
13
+ ForNode(const Token &token, SharedPtr<Node> expr, SharedPtr<Node> vars, SharedPtr<BlockNode> body)
14
+ : Node { token }
15
+ , m_expr { expr }
16
+ , m_vars { vars }
17
+ , m_body { body } {
18
+ assert(m_expr);
19
+ assert(m_vars);
20
+ }
21
+
22
+ virtual Type type() const override { return Type::For; }
23
+
24
+ const SharedPtr<Node> expr() const { return m_expr; }
25
+ const SharedPtr<Node> vars() const { return m_vars; }
26
+ const SharedPtr<BlockNode> body() const { return m_body; }
27
+
28
+ virtual void transform(Creator *creator) const override {
29
+ creator->set_type("for");
30
+ creator->append(m_expr);
31
+ switch (m_vars->type()) {
32
+ case Node::Type::Identifier:
33
+ creator->with_assignment(true, [&]() { creator->append(*m_vars); });
34
+ break;
35
+ case Node::Type::MultipleAssignment:
36
+ creator->append(m_vars);
37
+ break;
38
+ default:
39
+ TM_UNREACHABLE();
40
+ }
41
+ if (!m_body->is_empty())
42
+ creator->append(m_body->without_unnecessary_nesting());
43
+ }
44
+
45
+ protected:
46
+ SharedPtr<Node> m_expr {};
47
+ SharedPtr<Node> m_vars {};
48
+ SharedPtr<BlockNode> m_body {};
49
+ };
50
+ }
@@ -13,6 +13,12 @@ using namespace TM;
13
13
 
14
14
  class MatchNode : public Node {
15
15
  public:
16
+ MatchNode(const Token &token, SharedPtr<RegexpNode> regexp)
17
+ : Node { token }
18
+ , m_regexp { regexp } {
19
+ assert(m_regexp);
20
+ }
21
+
16
22
  MatchNode(const Token &token, SharedPtr<RegexpNode> regexp, SharedPtr<Node> arg, bool regexp_on_left)
17
23
  : Node { token }
18
24
  , m_regexp { regexp }
@@ -43,6 +43,7 @@ public:
43
43
  False,
44
44
  Fixnum,
45
45
  Float,
46
+ For,
46
47
  ForwardArgs,
47
48
  Hash,
48
49
  HashPattern,
@@ -31,6 +31,7 @@
31
31
  #include "natalie_parser/node/false_node.hpp"
32
32
  #include "natalie_parser/node/fixnum_node.hpp"
33
33
  #include "natalie_parser/node/float_node.hpp"
34
+ #include "natalie_parser/node/for_node.hpp"
34
35
  #include "natalie_parser/node/forward_args_node.hpp"
35
36
  #include "natalie_parser/node/hash_node.hpp"
36
37
  #include "natalie_parser/node/hash_pattern_node.hpp"
@@ -49,16 +49,22 @@ public:
49
49
 
50
50
  enum class Precedence;
51
51
 
52
+ enum class IterAllow {
53
+ NONE,
54
+ CURLY_ONLY,
55
+ CURLY_AND_BLOCK,
56
+ };
57
+
52
58
  SharedPtr<Node> tree();
53
59
 
54
60
  private:
55
- bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
61
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
56
62
 
57
63
  Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
64
 
59
65
  bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
66
 
61
- SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
67
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
62
68
 
63
69
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
70
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
@@ -67,8 +73,9 @@ private:
67
73
  SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
74
  SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
75
 
76
+ void reinsert_collapsed_newline();
70
77
  SharedPtr<Node> parse_alias(LocalsHashmap &);
71
- SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
78
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
72
79
  SharedPtr<Node> parse_array(LocalsHashmap &);
73
80
  SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
81
  SharedPtr<Node> parse_begin_block(LocalsHashmap &);
@@ -96,23 +103,27 @@ private:
96
103
  Method,
97
104
  Proc,
98
105
  };
99
- void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
106
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
107
+ SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
100
108
 
101
109
  SharedPtr<Node> parse_encoding(LocalsHashmap &);
102
110
  SharedPtr<Node> parse_end_block(LocalsHashmap &);
103
111
  SharedPtr<Node> parse_file_constant(LocalsHashmap &);
112
+ SharedPtr<Node> parse_for(LocalsHashmap &);
104
113
  SharedPtr<Node> parse_forward_args(LocalsHashmap &);
105
114
  SharedPtr<Node> parse_group(LocalsHashmap &);
106
115
  SharedPtr<Node> parse_hash(LocalsHashmap &);
107
116
  SharedPtr<Node> parse_hash_inner(LocalsHashmap &, Precedence, Token::Type, bool, SharedPtr<Node> = {});
108
117
  SharedPtr<Node> parse_identifier(LocalsHashmap &);
109
118
  SharedPtr<Node> parse_if(LocalsHashmap &);
119
+ SharedPtr<Node> parse_if_branch(LocalsHashmap &, bool);
110
120
  void parse_interpolated_body(LocalsHashmap &, InterpolatedNode &, Token::Type);
111
121
  SharedPtr<Node> parse_interpolated_regexp(LocalsHashmap &);
112
122
  int parse_regexp_options(String &);
113
123
  SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
114
124
  SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
115
125
  SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
126
+ SharedPtr<Node> parse_line_constant(LocalsHashmap &);
116
127
  SharedPtr<Node> parse_lit(LocalsHashmap &);
117
128
  SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
118
129
  SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
@@ -122,7 +133,7 @@ private:
122
133
  SharedPtr<Node> parse_nil(LocalsHashmap &);
123
134
  SharedPtr<Node> parse_not(LocalsHashmap &);
124
135
  SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
125
- void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
136
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
126
137
  SharedPtr<Node> parse_redo(LocalsHashmap &);
127
138
  SharedPtr<Node> parse_retry(LocalsHashmap &);
128
139
  SharedPtr<Node> parse_return(LocalsHashmap &);
@@ -103,6 +103,7 @@ public:
103
103
  NotKeyword,
104
104
  NotMatch,
105
105
  NthRef,
106
+ OperatorName,
106
107
  OrKeyword,
107
108
  Percent,
108
109
  PercentEqual,
@@ -431,6 +432,8 @@ public:
431
432
  return "!";
432
433
  case Type::NthRef:
433
434
  return "nth_ref";
435
+ case Type::OperatorName:
436
+ return "operator";
434
437
  case Type::OrKeyword:
435
438
  return "or";
436
439
  case Type::Percent:
@@ -614,6 +617,7 @@ public:
614
617
  case Token::Type::LessThanOrEqual:
615
618
  case Token::Type::Match:
616
619
  case Token::Type::Minus:
620
+ case Token::Type::Not:
617
621
  case Token::Type::NotEqual:
618
622
  case Token::Type::NotMatch:
619
623
  case Token::Type::Percent:
@@ -635,6 +639,7 @@ public:
635
639
  bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
636
640
  bool is_comma() const { return m_type == Type::Comma; }
637
641
  bool is_comment() const { return m_type == Type::Comment; }
642
+ bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
638
643
  bool is_def_keyword() const { return m_type == Type::DefKeyword; }
639
644
  bool is_doc() const { return m_type == Type::Doc; }
640
645
  bool is_dot() const { return m_type == Type::Dot; }
@@ -743,9 +748,23 @@ public:
743
748
  }
744
749
  }
745
750
 
751
+ bool can_be_first_arg_of_def() const {
752
+ switch (m_type) {
753
+ case Token::Type::Ampersand:
754
+ case Token::Type::BareName:
755
+ case Token::Type::Star:
756
+ case Token::Type::StarStar:
757
+ case Token::Type::SymbolKey:
758
+ return true;
759
+ default:
760
+ return false;
761
+ }
762
+ }
763
+
746
764
  bool can_be_first_arg_of_implicit_call() const {
747
765
  switch (m_type) {
748
766
  case Token::Type::Arrow:
767
+ case Token::Type::BackRef:
749
768
  case Token::Type::BareName:
750
769
  case Token::Type::BeginKeyword:
751
770
  case Token::Type::Bignum:
@@ -800,6 +819,7 @@ public:
800
819
  switch (m_type) {
801
820
  case Token::Type::Equal:
802
821
  case Token::Type::LBracket:
822
+ case Token::Type::LParen:
803
823
  return true;
804
824
  default:
805
825
  if (is_operator())
@@ -808,8 +828,25 @@ public:
808
828
  }
809
829
  }
810
830
 
831
+ bool can_precede_regexp_literal() const {
832
+ switch (m_type) {
833
+ case Type::ElsifKeyword:
834
+ case Type::IfKeyword:
835
+ case Type::RescueKeyword:
836
+ case Type::ReturnKeyword:
837
+ case Type::UnlessKeyword:
838
+ case Type::UntilKeyword:
839
+ case Type::WhenKeyword:
840
+ case Type::WhileKeyword:
841
+ return true;
842
+ default:
843
+ return false;
844
+ }
845
+ }
846
+
811
847
  bool can_precede_symbol_key() const {
812
848
  switch (m_type) {
849
+ case Type::Arrow:
813
850
  case Type::BareName:
814
851
  case Type::Comma:
815
852
  case Type::Constant:
@@ -818,6 +855,7 @@ public:
818
855
  case Type::LParen:
819
856
  case Type::Pipe:
820
857
  case Type::PipePipe:
858
+ case Type::SuperKeyword:
821
859
  return true;
822
860
  default:
823
861
  return false;
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '2.0.0'
4
+ VERSION = '2.2.0'
5
5
  end
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
22
22
  Token InterpolatedStringLexer::consume_string() {
23
23
  SharedPtr<String> buf = new String;
24
24
  while (auto c = current_char()) {
25
- if (c == '\\') {
25
+ if (c == '\\' && m_stop_char != '\\') {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
38
38
  Token RegexpLexer::consume_regexp() {
39
39
  SharedPtr<String> buf = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  switch (c) {
44
44
  case '/':
@@ -38,7 +38,7 @@ Token WordArrayLexer::build_next_token() {
38
38
  Token WordArrayLexer::consume_array() {
39
39
  m_buffer = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  advance();
44
44
  if (c == ' ') {