natalie_parser 2.0.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b35f9b98f6bbe4ec3bf8d5d9bce2c7e082504db23f442964f2a58e1ac9e11dc7
4
- data.tar.gz: 1fbf06aa6fae8400855ea0d363d77a1b27d78869266bfb17b0f45cf9b8a30d41
3
+ metadata.gz: 31bee2746c1e7a36eca16194e27467264dfbad5131075090e0b94c675ff150b1
4
+ data.tar.gz: 7f9a2a77aa2e34c56faeb740900779ff4682471cb02dbd51403eb1b7f72b13a7
5
5
  SHA512:
6
- metadata.gz: 936ec0ef70541afd5839dbe4450401d7e8713499c3536d9ac7e53eb5617671dd308eee8e4f458188a9b0890f5a7ed5d855b3011c5539b64eb31639b4140e07fc
7
- data.tar.gz: f6deb35c75e17e5c9fb49c6092e48453e904e7c7d4b515e2c7b5fe7fe69dc656da2faa6096b665b6021557ae8c47844ec65a9e8aca1933712f35d72f4db47c50
6
+ metadata.gz: 74335871743111340267d41631f3eeae0281ea5f93bb2bb4a6536682a55f325162d428d986ba0182e14adfb27e830f9622048fb443f1561ee9611c988b07bd6b
7
+ data.tar.gz: 2fd301c60b32db6c074946adceaa54fca3736f6e64335d5bb68c9ad01f092bf19f1ce2810fb75e4641e4b6cf7ebf7e77165538282107f214fa116254b0d37ac0
data/CHANGELOG.md CHANGED
@@ -1,5 +1,48 @@
1
1
  # Changelog
2
2
 
3
+ ## 2.2.0 (2022-10-24)
4
+
5
+ - FIX: Allow backreference as first arg of implicit call
6
+ - FIX: Exclude `?` and `!` from class/instance/global variable name
7
+ - FIX: Fix parsing of hashes containing assignments
8
+ - FIX: Parse aliases of unary operators and methods named as keywords
9
+ - FIX: Parse alias of keyword-like method to assignment method
10
+ - FIX: Parse alias/undef of assignment methods
11
+ - FIX: Parse backtick as method name
12
+ - FIX: Parse class/module definition in dynamic class/module
13
+ - FIX: Parse `def !`
14
+ - FIX: Parse `def foo::bar`
15
+ - FIX: Parse `foo(<<FOO)`
16
+ - FIX: Parse `for ... do` and `while ... do`
17
+ - FIX: Parse global variables beginning with `$_`
18
+ - FIX: Parse keywords/operators as method names for `::`
19
+ - FIX: Parse __LINE__ constant
20
+ - FIX: Parse more percent strings
21
+ - FIX: Parse more regexes
22
+ - FIX: Parse more stabby procs without parentheses
23
+ - FIX: Parse multiple parameters set to the same default value
24
+ - FIX: Parse parentheses-less stabby proc with keyword arg
25
+ - FIX: Parse undef of methods with special names
26
+ - FIX: Parse `unless ... then`
27
+ - FIX: Parse variable names starting with extended ASCII characters
28
+ - FIX: Raise SyntaxError for `def ====`
29
+ - FIX: Raise syntax error for `def +@.foo`
30
+ - FIX: Tokenize unterminated ternary at EOF
31
+ - FIX: Use lower precedence for block pass
32
+
33
+ ## 2.1.0 (2022-08-12)
34
+
35
+ - FEAT: Parse for loops
36
+ - FIX: Fix bug parsing defined? with parens
37
+ - FIX: Fix parsing of keyword splat next to other keyword args
38
+ - FIX: Parse block pass after bare/implicit hash
39
+ - FIX: Parse if statements with match conditions
40
+ - FIX: Parse regexps with leading space preceeded by keywords
41
+ - FIX: Parse symbol key after super keyword
42
+ - FIX: Parse unless statements with match conditions
43
+ - FIX: Parse while/until statements with match conditions
44
+ - FIX: Reset block association level inside array
45
+
3
46
  ## 2.0.0 (2022-06-24)
4
47
 
5
48
  - FEAT: Differentiate between bare/implicit hash and explicit one
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
80
80
  case NatalieParser::Token::Type::Constant:
81
81
  case NatalieParser::Token::Type::GlobalVariable:
82
82
  case NatalieParser::Token::Type::InstanceVariable:
83
+ case NatalieParser::Token::Type::OperatorName:
83
84
  case NatalieParser::Token::Type::Symbol:
84
85
  case NatalieParser::Token::Type::SymbolKey: {
85
86
  auto literal = token.literal_string();
@@ -70,9 +70,9 @@ protected:
70
70
  virtual bool skip_whitespace();
71
71
  virtual Token build_next_token();
72
72
  Token consume_symbol();
73
+ SharedPtr<String> consume_word();
73
74
  Token consume_word(Token::Type type);
74
- Token consume_bare_name();
75
- Token consume_constant();
75
+ Token consume_bare_name_or_constant(Token::Type type);
76
76
  Token consume_global_variable();
77
77
  Token consume_heredoc();
78
78
  Token consume_numeric();
@@ -80,11 +80,19 @@ protected:
80
80
  Token consume_nth_ref();
81
81
  long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
82
  long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
- Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
84
84
  Token consume_single_quoted_string(char, char);
85
85
  Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
86
  Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
87
  Token consume_regexp(char start_char, char stop_char);
88
+ Token consume_percent_symbol(char start_char, char stop_char);
89
+ Token consume_interpolated_string(char start_char, char stop_char);
90
+ Token consume_interpolated_shell(char start_char, char stop_char);
91
+ Token consume_percent_lower_w(char start_char, char stop_char);
92
+ Token consume_percent_upper_w(char start_char, char stop_char);
93
+ Token consume_percent_lower_i(char start_char, char stop_char);
94
+ Token consume_percent_upper_i(char start_char, char stop_char);
95
+ Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
88
96
  SharedPtr<String> consume_non_whitespace();
89
97
 
90
98
  void utf32_codepoint_to_utf8(String &buf, long long codepoint);
@@ -95,7 +103,7 @@ protected:
95
103
  bool token_is_first_on_line() const;
96
104
 
97
105
  bool char_can_be_string_or_regexp_delimiter(char c) const {
98
- return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
106
+ return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
107
  }
100
108
 
101
109
  SharedPtr<String> m_input;
@@ -131,5 +139,10 @@ protected:
131
139
  // then increment m_pair_depth
132
140
  char m_start_char { 0 };
133
141
  int m_pair_depth { 0 };
142
+
143
+ size_t m_remaining_method_names { 0 };
144
+ bool m_allow_assignment_method { false };
145
+ Token::Type m_method_name_separator { Token::Type::Invalid };
146
+ Token m_last_method_name {};
134
147
  };
135
148
  }
@@ -0,0 +1,50 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/node/block_node.hpp"
4
+ #include "natalie_parser/node/node.hpp"
5
+ #include "natalie_parser/node/node_with_args.hpp"
6
+
7
+ namespace NatalieParser {
8
+
9
+ using namespace TM;
10
+
11
+ class ForNode : public Node {
12
+ public:
13
+ ForNode(const Token &token, SharedPtr<Node> expr, SharedPtr<Node> vars, SharedPtr<BlockNode> body)
14
+ : Node { token }
15
+ , m_expr { expr }
16
+ , m_vars { vars }
17
+ , m_body { body } {
18
+ assert(m_expr);
19
+ assert(m_vars);
20
+ }
21
+
22
+ virtual Type type() const override { return Type::For; }
23
+
24
+ const SharedPtr<Node> expr() const { return m_expr; }
25
+ const SharedPtr<Node> vars() const { return m_vars; }
26
+ const SharedPtr<BlockNode> body() const { return m_body; }
27
+
28
+ virtual void transform(Creator *creator) const override {
29
+ creator->set_type("for");
30
+ creator->append(m_expr);
31
+ switch (m_vars->type()) {
32
+ case Node::Type::Identifier:
33
+ creator->with_assignment(true, [&]() { creator->append(*m_vars); });
34
+ break;
35
+ case Node::Type::MultipleAssignment:
36
+ creator->append(m_vars);
37
+ break;
38
+ default:
39
+ TM_UNREACHABLE();
40
+ }
41
+ if (!m_body->is_empty())
42
+ creator->append(m_body->without_unnecessary_nesting());
43
+ }
44
+
45
+ protected:
46
+ SharedPtr<Node> m_expr {};
47
+ SharedPtr<Node> m_vars {};
48
+ SharedPtr<BlockNode> m_body {};
49
+ };
50
+ }
@@ -13,6 +13,12 @@ using namespace TM;
13
13
 
14
14
  class MatchNode : public Node {
15
15
  public:
16
+ MatchNode(const Token &token, SharedPtr<RegexpNode> regexp)
17
+ : Node { token }
18
+ , m_regexp { regexp } {
19
+ assert(m_regexp);
20
+ }
21
+
16
22
  MatchNode(const Token &token, SharedPtr<RegexpNode> regexp, SharedPtr<Node> arg, bool regexp_on_left)
17
23
  : Node { token }
18
24
  , m_regexp { regexp }
@@ -43,6 +43,7 @@ public:
43
43
  False,
44
44
  Fixnum,
45
45
  Float,
46
+ For,
46
47
  ForwardArgs,
47
48
  Hash,
48
49
  HashPattern,
@@ -31,6 +31,7 @@
31
31
  #include "natalie_parser/node/false_node.hpp"
32
32
  #include "natalie_parser/node/fixnum_node.hpp"
33
33
  #include "natalie_parser/node/float_node.hpp"
34
+ #include "natalie_parser/node/for_node.hpp"
34
35
  #include "natalie_parser/node/forward_args_node.hpp"
35
36
  #include "natalie_parser/node/hash_node.hpp"
36
37
  #include "natalie_parser/node/hash_pattern_node.hpp"
@@ -49,16 +49,22 @@ public:
49
49
 
50
50
  enum class Precedence;
51
51
 
52
+ enum class IterAllow {
53
+ NONE,
54
+ CURLY_ONLY,
55
+ CURLY_AND_BLOCK,
56
+ };
57
+
52
58
  SharedPtr<Node> tree();
53
59
 
54
60
  private:
55
- bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
61
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
56
62
 
57
63
  Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
64
 
59
65
  bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
66
 
61
- SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
67
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
62
68
 
63
69
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
70
  SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
@@ -67,8 +73,9 @@ private:
67
73
  SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
74
  SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
75
 
76
+ void reinsert_collapsed_newline();
70
77
  SharedPtr<Node> parse_alias(LocalsHashmap &);
71
- SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
78
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
72
79
  SharedPtr<Node> parse_array(LocalsHashmap &);
73
80
  SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
81
  SharedPtr<Node> parse_begin_block(LocalsHashmap &);
@@ -96,23 +103,27 @@ private:
96
103
  Method,
97
104
  Proc,
98
105
  };
99
- void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
106
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
107
+ SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
100
108
 
101
109
  SharedPtr<Node> parse_encoding(LocalsHashmap &);
102
110
  SharedPtr<Node> parse_end_block(LocalsHashmap &);
103
111
  SharedPtr<Node> parse_file_constant(LocalsHashmap &);
112
+ SharedPtr<Node> parse_for(LocalsHashmap &);
104
113
  SharedPtr<Node> parse_forward_args(LocalsHashmap &);
105
114
  SharedPtr<Node> parse_group(LocalsHashmap &);
106
115
  SharedPtr<Node> parse_hash(LocalsHashmap &);
107
116
  SharedPtr<Node> parse_hash_inner(LocalsHashmap &, Precedence, Token::Type, bool, SharedPtr<Node> = {});
108
117
  SharedPtr<Node> parse_identifier(LocalsHashmap &);
109
118
  SharedPtr<Node> parse_if(LocalsHashmap &);
119
+ SharedPtr<Node> parse_if_branch(LocalsHashmap &, bool);
110
120
  void parse_interpolated_body(LocalsHashmap &, InterpolatedNode &, Token::Type);
111
121
  SharedPtr<Node> parse_interpolated_regexp(LocalsHashmap &);
112
122
  int parse_regexp_options(String &);
113
123
  SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
114
124
  SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
115
125
  SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
126
+ SharedPtr<Node> parse_line_constant(LocalsHashmap &);
116
127
  SharedPtr<Node> parse_lit(LocalsHashmap &);
117
128
  SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
118
129
  SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
@@ -122,7 +133,7 @@ private:
122
133
  SharedPtr<Node> parse_nil(LocalsHashmap &);
123
134
  SharedPtr<Node> parse_not(LocalsHashmap &);
124
135
  SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
125
- void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
136
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
126
137
  SharedPtr<Node> parse_redo(LocalsHashmap &);
127
138
  SharedPtr<Node> parse_retry(LocalsHashmap &);
128
139
  SharedPtr<Node> parse_return(LocalsHashmap &);
@@ -103,6 +103,7 @@ public:
103
103
  NotKeyword,
104
104
  NotMatch,
105
105
  NthRef,
106
+ OperatorName,
106
107
  OrKeyword,
107
108
  Percent,
108
109
  PercentEqual,
@@ -431,6 +432,8 @@ public:
431
432
  return "!";
432
433
  case Type::NthRef:
433
434
  return "nth_ref";
435
+ case Type::OperatorName:
436
+ return "operator";
434
437
  case Type::OrKeyword:
435
438
  return "or";
436
439
  case Type::Percent:
@@ -614,6 +617,7 @@ public:
614
617
  case Token::Type::LessThanOrEqual:
615
618
  case Token::Type::Match:
616
619
  case Token::Type::Minus:
620
+ case Token::Type::Not:
617
621
  case Token::Type::NotEqual:
618
622
  case Token::Type::NotMatch:
619
623
  case Token::Type::Percent:
@@ -635,6 +639,7 @@ public:
635
639
  bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
636
640
  bool is_comma() const { return m_type == Type::Comma; }
637
641
  bool is_comment() const { return m_type == Type::Comment; }
642
+ bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
638
643
  bool is_def_keyword() const { return m_type == Type::DefKeyword; }
639
644
  bool is_doc() const { return m_type == Type::Doc; }
640
645
  bool is_dot() const { return m_type == Type::Dot; }
@@ -743,9 +748,23 @@ public:
743
748
  }
744
749
  }
745
750
 
751
+ bool can_be_first_arg_of_def() const {
752
+ switch (m_type) {
753
+ case Token::Type::Ampersand:
754
+ case Token::Type::BareName:
755
+ case Token::Type::Star:
756
+ case Token::Type::StarStar:
757
+ case Token::Type::SymbolKey:
758
+ return true;
759
+ default:
760
+ return false;
761
+ }
762
+ }
763
+
746
764
  bool can_be_first_arg_of_implicit_call() const {
747
765
  switch (m_type) {
748
766
  case Token::Type::Arrow:
767
+ case Token::Type::BackRef:
749
768
  case Token::Type::BareName:
750
769
  case Token::Type::BeginKeyword:
751
770
  case Token::Type::Bignum:
@@ -800,6 +819,7 @@ public:
800
819
  switch (m_type) {
801
820
  case Token::Type::Equal:
802
821
  case Token::Type::LBracket:
822
+ case Token::Type::LParen:
803
823
  return true;
804
824
  default:
805
825
  if (is_operator())
@@ -808,8 +828,25 @@ public:
808
828
  }
809
829
  }
810
830
 
831
+ bool can_precede_regexp_literal() const {
832
+ switch (m_type) {
833
+ case Type::ElsifKeyword:
834
+ case Type::IfKeyword:
835
+ case Type::RescueKeyword:
836
+ case Type::ReturnKeyword:
837
+ case Type::UnlessKeyword:
838
+ case Type::UntilKeyword:
839
+ case Type::WhenKeyword:
840
+ case Type::WhileKeyword:
841
+ return true;
842
+ default:
843
+ return false;
844
+ }
845
+ }
846
+
811
847
  bool can_precede_symbol_key() const {
812
848
  switch (m_type) {
849
+ case Type::Arrow:
813
850
  case Type::BareName:
814
851
  case Type::Comma:
815
852
  case Type::Constant:
@@ -818,6 +855,7 @@ public:
818
855
  case Type::LParen:
819
856
  case Type::Pipe:
820
857
  case Type::PipePipe:
858
+ case Type::SuperKeyword:
821
859
  return true;
822
860
  default:
823
861
  return false;
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class NatalieParser
4
- VERSION = '2.0.0'
4
+ VERSION = '2.2.0'
5
5
  end
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
22
22
  Token InterpolatedStringLexer::consume_string() {
23
23
  SharedPtr<String> buf = new String;
24
24
  while (auto c = current_char()) {
25
- if (c == '\\') {
25
+ if (c == '\\' && m_stop_char != '\\') {
26
26
  advance(); // backslash
27
27
  auto result = consume_escaped_byte(*buf);
28
28
  if (!result.first)
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
38
38
  Token RegexpLexer::consume_regexp() {
39
39
  SharedPtr<String> buf = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  switch (c) {
44
44
  case '/':
@@ -38,7 +38,7 @@ Token WordArrayLexer::build_next_token() {
38
38
  Token WordArrayLexer::consume_array() {
39
39
  m_buffer = new String;
40
40
  while (auto c = current_char()) {
41
- if (c == '\\') {
41
+ if (c == '\\' && m_stop_char != '\\') {
42
42
  c = next();
43
43
  advance();
44
44
  if (c == ' ') {