natalie_parser 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/ext/natalie_parser/natalie_parser.cpp +1 -0
- data/include/natalie_parser/lexer.hpp +17 -4
- data/include/natalie_parser/node/for_node.hpp +50 -0
- data/include/natalie_parser/node/match_node.hpp +6 -0
- data/include/natalie_parser/node/node.hpp +1 -0
- data/include/natalie_parser/node.hpp +1 -0
- data/include/natalie_parser/parser.hpp +16 -5
- data/include/natalie_parser/token.hpp +38 -0
- data/lib/natalie_parser/version.rb +1 -1
- data/src/lexer/interpolated_string_lexer.cpp +1 -1
- data/src/lexer/regexp_lexer.cpp +1 -1
- data/src/lexer/word_array_lexer.cpp +1 -1
- data/src/lexer.cpp +170 -248
- data/src/node/match_node.cpp +5 -0
- data/src/parser.cpp +195 -105
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31bee2746c1e7a36eca16194e27467264dfbad5131075090e0b94c675ff150b1
|
4
|
+
data.tar.gz: 7f9a2a77aa2e34c56faeb740900779ff4682471cb02dbd51403eb1b7f72b13a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74335871743111340267d41631f3eeae0281ea5f93bb2bb4a6536682a55f325162d428d986ba0182e14adfb27e830f9622048fb443f1561ee9611c988b07bd6b
|
7
|
+
data.tar.gz: 2fd301c60b32db6c074946adceaa54fca3736f6e64335d5bb68c9ad01f092bf19f1ce2810fb75e4641e4b6cf7ebf7e77165538282107f214fa116254b0d37ac0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,48 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.2.0 (2022-10-24)
|
4
|
+
|
5
|
+
- FIX: Allow backreference as first arg of implicit call
|
6
|
+
- FIX: Exclude `?` and `!` from class/instance/global variable name
|
7
|
+
- FIX: Fix parsing of hashes containing assignments
|
8
|
+
- FIX: Parse aliases of unary operators and methods named as keywords
|
9
|
+
- FIX: Parse alias of keyword-like method to assignment method
|
10
|
+
- FIX: Parse alias/undef of assignment methods
|
11
|
+
- FIX: Parse backtick as method name
|
12
|
+
- FIX: Parse class/module definition in dynamic class/module
|
13
|
+
- FIX: Parse `def !`
|
14
|
+
- FIX: Parse `def foo::bar`
|
15
|
+
- FIX: Parse `foo(<<FOO)`
|
16
|
+
- FIX: Parse `for ... do` and `while ... do`
|
17
|
+
- FIX: Parse global variables beginning with `$_`
|
18
|
+
- FIX: Parse keywords/operators as method names for `::`
|
19
|
+
- FIX: Parse __LINE__ constant
|
20
|
+
- FIX: Parse more percent strings
|
21
|
+
- FIX: Parse more regexes
|
22
|
+
- FIX: Parse more stabby procs without parentheses
|
23
|
+
- FIX: Parse multiple parameters set to the same default value
|
24
|
+
- FIX: Parse parentheses-less stabby proc with keyword arg
|
25
|
+
- FIX: Parse undef of methods with special names
|
26
|
+
- FIX: Parse `unless ... then`
|
27
|
+
- FIX: Parse variable names starting with extended ASCII characters
|
28
|
+
- FIX: Raise SyntaxError for `def ====`
|
29
|
+
- FIX: Raise syntax error for `def +@.foo`
|
30
|
+
- FIX: Tokenize unterminated ternary at EOF
|
31
|
+
- FIX: Use lower precedence for block pass
|
32
|
+
|
33
|
+
## 2.1.0 (2022-08-12)
|
34
|
+
|
35
|
+
- FEAT: Parse for loops
|
36
|
+
- FIX: Fix bug parsing defined? with parens
|
37
|
+
- FIX: Fix parsing of keyword splat next to other keyword args
|
38
|
+
- FIX: Parse block pass after bare/implicit hash
|
39
|
+
- FIX: Parse if statements with match conditions
|
40
|
+
- FIX: Parse regexps with leading space preceeded by keywords
|
41
|
+
- FIX: Parse symbol key after super keyword
|
42
|
+
- FIX: Parse unless statements with match conditions
|
43
|
+
- FIX: Parse while/until statements with match conditions
|
44
|
+
- FIX: Reset block association level inside array
|
45
|
+
|
3
46
|
## 2.0.0 (2022-06-24)
|
4
47
|
|
5
48
|
- FEAT: Differentiate between bare/implicit hash and explicit one
|
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
|
|
80
80
|
case NatalieParser::Token::Type::Constant:
|
81
81
|
case NatalieParser::Token::Type::GlobalVariable:
|
82
82
|
case NatalieParser::Token::Type::InstanceVariable:
|
83
|
+
case NatalieParser::Token::Type::OperatorName:
|
83
84
|
case NatalieParser::Token::Type::Symbol:
|
84
85
|
case NatalieParser::Token::Type::SymbolKey: {
|
85
86
|
auto literal = token.literal_string();
|
@@ -70,9 +70,9 @@ protected:
|
|
70
70
|
virtual bool skip_whitespace();
|
71
71
|
virtual Token build_next_token();
|
72
72
|
Token consume_symbol();
|
73
|
+
SharedPtr<String> consume_word();
|
73
74
|
Token consume_word(Token::Type type);
|
74
|
-
Token
|
75
|
-
Token consume_constant();
|
75
|
+
Token consume_bare_name_or_constant(Token::Type type);
|
76
76
|
Token consume_global_variable();
|
77
77
|
Token consume_heredoc();
|
78
78
|
Token consume_numeric();
|
@@ -80,11 +80,19 @@ protected:
|
|
80
80
|
Token consume_nth_ref();
|
81
81
|
long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
|
82
82
|
long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
|
83
|
-
Token consume_double_quoted_string(char, char, Token::Type begin_type
|
83
|
+
Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
|
84
84
|
Token consume_single_quoted_string(char, char);
|
85
85
|
Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
|
86
86
|
Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
|
87
87
|
Token consume_regexp(char start_char, char stop_char);
|
88
|
+
Token consume_percent_symbol(char start_char, char stop_char);
|
89
|
+
Token consume_interpolated_string(char start_char, char stop_char);
|
90
|
+
Token consume_interpolated_shell(char start_char, char stop_char);
|
91
|
+
Token consume_percent_lower_w(char start_char, char stop_char);
|
92
|
+
Token consume_percent_upper_w(char start_char, char stop_char);
|
93
|
+
Token consume_percent_lower_i(char start_char, char stop_char);
|
94
|
+
Token consume_percent_upper_i(char start_char, char stop_char);
|
95
|
+
Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
|
88
96
|
SharedPtr<String> consume_non_whitespace();
|
89
97
|
|
90
98
|
void utf32_codepoint_to_utf8(String &buf, long long codepoint);
|
@@ -95,7 +103,7 @@ protected:
|
|
95
103
|
bool token_is_first_on_line() const;
|
96
104
|
|
97
105
|
bool char_can_be_string_or_regexp_delimiter(char c) const {
|
98
|
-
return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
|
106
|
+
return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
|
99
107
|
}
|
100
108
|
|
101
109
|
SharedPtr<String> m_input;
|
@@ -131,5 +139,10 @@ protected:
|
|
131
139
|
// then increment m_pair_depth
|
132
140
|
char m_start_char { 0 };
|
133
141
|
int m_pair_depth { 0 };
|
142
|
+
|
143
|
+
size_t m_remaining_method_names { 0 };
|
144
|
+
bool m_allow_assignment_method { false };
|
145
|
+
Token::Type m_method_name_separator { Token::Type::Invalid };
|
146
|
+
Token m_last_method_name {};
|
134
147
|
};
|
135
148
|
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "natalie_parser/node/block_node.hpp"
|
4
|
+
#include "natalie_parser/node/node.hpp"
|
5
|
+
#include "natalie_parser/node/node_with_args.hpp"
|
6
|
+
|
7
|
+
namespace NatalieParser {
|
8
|
+
|
9
|
+
using namespace TM;
|
10
|
+
|
11
|
+
class ForNode : public Node {
|
12
|
+
public:
|
13
|
+
ForNode(const Token &token, SharedPtr<Node> expr, SharedPtr<Node> vars, SharedPtr<BlockNode> body)
|
14
|
+
: Node { token }
|
15
|
+
, m_expr { expr }
|
16
|
+
, m_vars { vars }
|
17
|
+
, m_body { body } {
|
18
|
+
assert(m_expr);
|
19
|
+
assert(m_vars);
|
20
|
+
}
|
21
|
+
|
22
|
+
virtual Type type() const override { return Type::For; }
|
23
|
+
|
24
|
+
const SharedPtr<Node> expr() const { return m_expr; }
|
25
|
+
const SharedPtr<Node> vars() const { return m_vars; }
|
26
|
+
const SharedPtr<BlockNode> body() const { return m_body; }
|
27
|
+
|
28
|
+
virtual void transform(Creator *creator) const override {
|
29
|
+
creator->set_type("for");
|
30
|
+
creator->append(m_expr);
|
31
|
+
switch (m_vars->type()) {
|
32
|
+
case Node::Type::Identifier:
|
33
|
+
creator->with_assignment(true, [&]() { creator->append(*m_vars); });
|
34
|
+
break;
|
35
|
+
case Node::Type::MultipleAssignment:
|
36
|
+
creator->append(m_vars);
|
37
|
+
break;
|
38
|
+
default:
|
39
|
+
TM_UNREACHABLE();
|
40
|
+
}
|
41
|
+
if (!m_body->is_empty())
|
42
|
+
creator->append(m_body->without_unnecessary_nesting());
|
43
|
+
}
|
44
|
+
|
45
|
+
protected:
|
46
|
+
SharedPtr<Node> m_expr {};
|
47
|
+
SharedPtr<Node> m_vars {};
|
48
|
+
SharedPtr<BlockNode> m_body {};
|
49
|
+
};
|
50
|
+
}
|
@@ -13,6 +13,12 @@ using namespace TM;
|
|
13
13
|
|
14
14
|
class MatchNode : public Node {
|
15
15
|
public:
|
16
|
+
MatchNode(const Token &token, SharedPtr<RegexpNode> regexp)
|
17
|
+
: Node { token }
|
18
|
+
, m_regexp { regexp } {
|
19
|
+
assert(m_regexp);
|
20
|
+
}
|
21
|
+
|
16
22
|
MatchNode(const Token &token, SharedPtr<RegexpNode> regexp, SharedPtr<Node> arg, bool regexp_on_left)
|
17
23
|
: Node { token }
|
18
24
|
, m_regexp { regexp }
|
@@ -31,6 +31,7 @@
|
|
31
31
|
#include "natalie_parser/node/false_node.hpp"
|
32
32
|
#include "natalie_parser/node/fixnum_node.hpp"
|
33
33
|
#include "natalie_parser/node/float_node.hpp"
|
34
|
+
#include "natalie_parser/node/for_node.hpp"
|
34
35
|
#include "natalie_parser/node/forward_args_node.hpp"
|
35
36
|
#include "natalie_parser/node/hash_node.hpp"
|
36
37
|
#include "natalie_parser/node/hash_pattern_node.hpp"
|
@@ -49,16 +49,22 @@ public:
|
|
49
49
|
|
50
50
|
enum class Precedence;
|
51
51
|
|
52
|
+
enum class IterAllow {
|
53
|
+
NONE,
|
54
|
+
CURLY_ONLY,
|
55
|
+
CURLY_AND_BLOCK,
|
56
|
+
};
|
57
|
+
|
52
58
|
SharedPtr<Node> tree();
|
53
59
|
|
54
60
|
private:
|
55
|
-
bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
|
61
|
+
bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
|
56
62
|
|
57
63
|
Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
|
58
64
|
|
59
65
|
bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
|
60
66
|
|
61
|
-
SharedPtr<Node> parse_expression(Precedence, LocalsHashmap
|
67
|
+
SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
|
62
68
|
|
63
69
|
SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
|
64
70
|
SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
|
@@ -67,8 +73,9 @@ private:
|
|
67
73
|
SharedPtr<Node> parse_if_body(LocalsHashmap &);
|
68
74
|
SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
|
69
75
|
|
76
|
+
void reinsert_collapsed_newline();
|
70
77
|
SharedPtr<Node> parse_alias(LocalsHashmap &);
|
71
|
-
SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char
|
78
|
+
SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
|
72
79
|
SharedPtr<Node> parse_array(LocalsHashmap &);
|
73
80
|
SharedPtr<Node> parse_back_ref(LocalsHashmap &);
|
74
81
|
SharedPtr<Node> parse_begin_block(LocalsHashmap &);
|
@@ -96,23 +103,27 @@ private:
|
|
96
103
|
Method,
|
97
104
|
Proc,
|
98
105
|
};
|
99
|
-
void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
|
106
|
+
void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
|
107
|
+
SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
|
100
108
|
|
101
109
|
SharedPtr<Node> parse_encoding(LocalsHashmap &);
|
102
110
|
SharedPtr<Node> parse_end_block(LocalsHashmap &);
|
103
111
|
SharedPtr<Node> parse_file_constant(LocalsHashmap &);
|
112
|
+
SharedPtr<Node> parse_for(LocalsHashmap &);
|
104
113
|
SharedPtr<Node> parse_forward_args(LocalsHashmap &);
|
105
114
|
SharedPtr<Node> parse_group(LocalsHashmap &);
|
106
115
|
SharedPtr<Node> parse_hash(LocalsHashmap &);
|
107
116
|
SharedPtr<Node> parse_hash_inner(LocalsHashmap &, Precedence, Token::Type, bool, SharedPtr<Node> = {});
|
108
117
|
SharedPtr<Node> parse_identifier(LocalsHashmap &);
|
109
118
|
SharedPtr<Node> parse_if(LocalsHashmap &);
|
119
|
+
SharedPtr<Node> parse_if_branch(LocalsHashmap &, bool);
|
110
120
|
void parse_interpolated_body(LocalsHashmap &, InterpolatedNode &, Token::Type);
|
111
121
|
SharedPtr<Node> parse_interpolated_regexp(LocalsHashmap &);
|
112
122
|
int parse_regexp_options(String &);
|
113
123
|
SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
|
114
124
|
SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
|
115
125
|
SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
|
126
|
+
SharedPtr<Node> parse_line_constant(LocalsHashmap &);
|
116
127
|
SharedPtr<Node> parse_lit(LocalsHashmap &);
|
117
128
|
SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
|
118
129
|
SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
|
@@ -122,7 +133,7 @@ private:
|
|
122
133
|
SharedPtr<Node> parse_nil(LocalsHashmap &);
|
123
134
|
SharedPtr<Node> parse_not(LocalsHashmap &);
|
124
135
|
SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
|
125
|
-
void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap
|
136
|
+
void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
|
126
137
|
SharedPtr<Node> parse_redo(LocalsHashmap &);
|
127
138
|
SharedPtr<Node> parse_retry(LocalsHashmap &);
|
128
139
|
SharedPtr<Node> parse_return(LocalsHashmap &);
|
@@ -103,6 +103,7 @@ public:
|
|
103
103
|
NotKeyword,
|
104
104
|
NotMatch,
|
105
105
|
NthRef,
|
106
|
+
OperatorName,
|
106
107
|
OrKeyword,
|
107
108
|
Percent,
|
108
109
|
PercentEqual,
|
@@ -431,6 +432,8 @@ public:
|
|
431
432
|
return "!";
|
432
433
|
case Type::NthRef:
|
433
434
|
return "nth_ref";
|
435
|
+
case Type::OperatorName:
|
436
|
+
return "operator";
|
434
437
|
case Type::OrKeyword:
|
435
438
|
return "or";
|
436
439
|
case Type::Percent:
|
@@ -614,6 +617,7 @@ public:
|
|
614
617
|
case Token::Type::LessThanOrEqual:
|
615
618
|
case Token::Type::Match:
|
616
619
|
case Token::Type::Minus:
|
620
|
+
case Token::Type::Not:
|
617
621
|
case Token::Type::NotEqual:
|
618
622
|
case Token::Type::NotMatch:
|
619
623
|
case Token::Type::Percent:
|
@@ -635,6 +639,7 @@ public:
|
|
635
639
|
bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
|
636
640
|
bool is_comma() const { return m_type == Type::Comma; }
|
637
641
|
bool is_comment() const { return m_type == Type::Comment; }
|
642
|
+
bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
|
638
643
|
bool is_def_keyword() const { return m_type == Type::DefKeyword; }
|
639
644
|
bool is_doc() const { return m_type == Type::Doc; }
|
640
645
|
bool is_dot() const { return m_type == Type::Dot; }
|
@@ -743,9 +748,23 @@ public:
|
|
743
748
|
}
|
744
749
|
}
|
745
750
|
|
751
|
+
bool can_be_first_arg_of_def() const {
|
752
|
+
switch (m_type) {
|
753
|
+
case Token::Type::Ampersand:
|
754
|
+
case Token::Type::BareName:
|
755
|
+
case Token::Type::Star:
|
756
|
+
case Token::Type::StarStar:
|
757
|
+
case Token::Type::SymbolKey:
|
758
|
+
return true;
|
759
|
+
default:
|
760
|
+
return false;
|
761
|
+
}
|
762
|
+
}
|
763
|
+
|
746
764
|
bool can_be_first_arg_of_implicit_call() const {
|
747
765
|
switch (m_type) {
|
748
766
|
case Token::Type::Arrow:
|
767
|
+
case Token::Type::BackRef:
|
749
768
|
case Token::Type::BareName:
|
750
769
|
case Token::Type::BeginKeyword:
|
751
770
|
case Token::Type::Bignum:
|
@@ -800,6 +819,7 @@ public:
|
|
800
819
|
switch (m_type) {
|
801
820
|
case Token::Type::Equal:
|
802
821
|
case Token::Type::LBracket:
|
822
|
+
case Token::Type::LParen:
|
803
823
|
return true;
|
804
824
|
default:
|
805
825
|
if (is_operator())
|
@@ -808,8 +828,25 @@ public:
|
|
808
828
|
}
|
809
829
|
}
|
810
830
|
|
831
|
+
bool can_precede_regexp_literal() const {
|
832
|
+
switch (m_type) {
|
833
|
+
case Type::ElsifKeyword:
|
834
|
+
case Type::IfKeyword:
|
835
|
+
case Type::RescueKeyword:
|
836
|
+
case Type::ReturnKeyword:
|
837
|
+
case Type::UnlessKeyword:
|
838
|
+
case Type::UntilKeyword:
|
839
|
+
case Type::WhenKeyword:
|
840
|
+
case Type::WhileKeyword:
|
841
|
+
return true;
|
842
|
+
default:
|
843
|
+
return false;
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|
811
847
|
bool can_precede_symbol_key() const {
|
812
848
|
switch (m_type) {
|
849
|
+
case Type::Arrow:
|
813
850
|
case Type::BareName:
|
814
851
|
case Type::Comma:
|
815
852
|
case Type::Constant:
|
@@ -818,6 +855,7 @@ public:
|
|
818
855
|
case Type::LParen:
|
819
856
|
case Type::Pipe:
|
820
857
|
case Type::PipePipe:
|
858
|
+
case Type::SuperKeyword:
|
821
859
|
return true;
|
822
860
|
default:
|
823
861
|
return false;
|
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
|
|
22
22
|
Token InterpolatedStringLexer::consume_string() {
|
23
23
|
SharedPtr<String> buf = new String;
|
24
24
|
while (auto c = current_char()) {
|
25
|
-
if (c == '\\') {
|
25
|
+
if (c == '\\' && m_stop_char != '\\') {
|
26
26
|
advance(); // backslash
|
27
27
|
auto result = consume_escaped_byte(*buf);
|
28
28
|
if (!result.first)
|
data/src/lexer/regexp_lexer.cpp
CHANGED
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
|
|
38
38
|
Token RegexpLexer::consume_regexp() {
|
39
39
|
SharedPtr<String> buf = new String;
|
40
40
|
while (auto c = current_char()) {
|
41
|
-
if (c == '\\') {
|
41
|
+
if (c == '\\' && m_stop_char != '\\') {
|
42
42
|
c = next();
|
43
43
|
switch (c) {
|
44
44
|
case '/':
|