natalie_parser 2.0.0 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +43 -0
- data/ext/natalie_parser/natalie_parser.cpp +1 -0
- data/include/natalie_parser/lexer.hpp +17 -4
- data/include/natalie_parser/node/for_node.hpp +50 -0
- data/include/natalie_parser/node/match_node.hpp +6 -0
- data/include/natalie_parser/node/node.hpp +1 -0
- data/include/natalie_parser/node.hpp +1 -0
- data/include/natalie_parser/parser.hpp +16 -5
- data/include/natalie_parser/token.hpp +38 -0
- data/lib/natalie_parser/version.rb +1 -1
- data/src/lexer/interpolated_string_lexer.cpp +1 -1
- data/src/lexer/regexp_lexer.cpp +1 -1
- data/src/lexer/word_array_lexer.cpp +1 -1
- data/src/lexer.cpp +170 -248
- data/src/node/match_node.cpp +5 -0
- data/src/parser.cpp +195 -105
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31bee2746c1e7a36eca16194e27467264dfbad5131075090e0b94c675ff150b1
|
4
|
+
data.tar.gz: 7f9a2a77aa2e34c56faeb740900779ff4682471cb02dbd51403eb1b7f72b13a7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74335871743111340267d41631f3eeae0281ea5f93bb2bb4a6536682a55f325162d428d986ba0182e14adfb27e830f9622048fb443f1561ee9611c988b07bd6b
|
7
|
+
data.tar.gz: 2fd301c60b32db6c074946adceaa54fca3736f6e64335d5bb68c9ad01f092bf19f1ce2810fb75e4641e4b6cf7ebf7e77165538282107f214fa116254b0d37ac0
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,48 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## 2.2.0 (2022-10-24)
|
4
|
+
|
5
|
+
- FIX: Allow backreference as first arg of implicit call
|
6
|
+
- FIX: Exclude `?` and `!` from class/instance/global variable name
|
7
|
+
- FIX: Fix parsing of hashes containing assignments
|
8
|
+
- FIX: Parse aliases of unary operators and methods named as keywords
|
9
|
+
- FIX: Parse alias of keyword-like method to assignment method
|
10
|
+
- FIX: Parse alias/undef of assignment methods
|
11
|
+
- FIX: Parse backtick as method name
|
12
|
+
- FIX: Parse class/module definition in dynamic class/module
|
13
|
+
- FIX: Parse `def !`
|
14
|
+
- FIX: Parse `def foo::bar`
|
15
|
+
- FIX: Parse `foo(<<FOO)`
|
16
|
+
- FIX: Parse `for ... do` and `while ... do`
|
17
|
+
- FIX: Parse global variables beginning with `$_`
|
18
|
+
- FIX: Parse keywords/operators as method names for `::`
|
19
|
+
- FIX: Parse __LINE__ constant
|
20
|
+
- FIX: Parse more percent strings
|
21
|
+
- FIX: Parse more regexes
|
22
|
+
- FIX: Parse more stabby procs without parentheses
|
23
|
+
- FIX: Parse multiple parameters set to the same default value
|
24
|
+
- FIX: Parse parentheses-less stabby proc with keyword arg
|
25
|
+
- FIX: Parse undef of methods with special names
|
26
|
+
- FIX: Parse `unless ... then`
|
27
|
+
- FIX: Parse variable names starting with extended ASCII characters
|
28
|
+
- FIX: Raise SyntaxError for `def ====`
|
29
|
+
- FIX: Raise syntax error for `def +@.foo`
|
30
|
+
- FIX: Tokenize unterminated ternary at EOF
|
31
|
+
- FIX: Use lower precedence for block pass
|
32
|
+
|
33
|
+
## 2.1.0 (2022-08-12)
|
34
|
+
|
35
|
+
- FEAT: Parse for loops
|
36
|
+
- FIX: Fix bug parsing defined? with parens
|
37
|
+
- FIX: Fix parsing of keyword splat next to other keyword args
|
38
|
+
- FIX: Parse block pass after bare/implicit hash
|
39
|
+
- FIX: Parse if statements with match conditions
|
40
|
+
- FIX: Parse regexps with leading space preceeded by keywords
|
41
|
+
- FIX: Parse symbol key after super keyword
|
42
|
+
- FIX: Parse unless statements with match conditions
|
43
|
+
- FIX: Parse while/until statements with match conditions
|
44
|
+
- FIX: Reset block association level inside array
|
45
|
+
|
3
46
|
## 2.0.0 (2022-06-24)
|
4
47
|
|
5
48
|
- FEAT: Differentiate between bare/implicit hash and explicit one
|
@@ -80,6 +80,7 @@ VALUE token_to_ruby(NatalieParser::Token token, bool include_location_info) {
|
|
80
80
|
case NatalieParser::Token::Type::Constant:
|
81
81
|
case NatalieParser::Token::Type::GlobalVariable:
|
82
82
|
case NatalieParser::Token::Type::InstanceVariable:
|
83
|
+
case NatalieParser::Token::Type::OperatorName:
|
83
84
|
case NatalieParser::Token::Type::Symbol:
|
84
85
|
case NatalieParser::Token::Type::SymbolKey: {
|
85
86
|
auto literal = token.literal_string();
|
@@ -70,9 +70,9 @@ protected:
|
|
70
70
|
virtual bool skip_whitespace();
|
71
71
|
virtual Token build_next_token();
|
72
72
|
Token consume_symbol();
|
73
|
+
SharedPtr<String> consume_word();
|
73
74
|
Token consume_word(Token::Type type);
|
74
|
-
Token
|
75
|
-
Token consume_constant();
|
75
|
+
Token consume_bare_name_or_constant(Token::Type type);
|
76
76
|
Token consume_global_variable();
|
77
77
|
Token consume_heredoc();
|
78
78
|
Token consume_numeric();
|
@@ -80,11 +80,19 @@ protected:
|
|
80
80
|
Token consume_nth_ref();
|
81
81
|
long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
|
82
82
|
long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
|
83
|
-
Token consume_double_quoted_string(char, char, Token::Type begin_type
|
83
|
+
Token consume_double_quoted_string(char, char, Token::Type begin_type, Token::Type end_type);
|
84
84
|
Token consume_single_quoted_string(char, char);
|
85
85
|
Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
|
86
86
|
Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
|
87
87
|
Token consume_regexp(char start_char, char stop_char);
|
88
|
+
Token consume_percent_symbol(char start_char, char stop_char);
|
89
|
+
Token consume_interpolated_string(char start_char, char stop_char);
|
90
|
+
Token consume_interpolated_shell(char start_char, char stop_char);
|
91
|
+
Token consume_percent_lower_w(char start_char, char stop_char);
|
92
|
+
Token consume_percent_upper_w(char start_char, char stop_char);
|
93
|
+
Token consume_percent_lower_i(char start_char, char stop_char);
|
94
|
+
Token consume_percent_upper_i(char start_char, char stop_char);
|
95
|
+
Token consume_percent_string(Token (Lexer::*consumer)(char start_char, char stop_char), bool is_lettered = true);
|
88
96
|
SharedPtr<String> consume_non_whitespace();
|
89
97
|
|
90
98
|
void utf32_codepoint_to_utf8(String &buf, long long codepoint);
|
@@ -95,7 +103,7 @@ protected:
|
|
95
103
|
bool token_is_first_on_line() const;
|
96
104
|
|
97
105
|
bool char_can_be_string_or_regexp_delimiter(char c) const {
|
98
|
-
return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
|
106
|
+
return (c >= '!' && c <= '/') || c == ':' || c == ';' || c == '=' || c == '?' || c == '@' || c == '\\' || c == '~' || c == '|' || (c >= '^' && c <= '`');
|
99
107
|
}
|
100
108
|
|
101
109
|
SharedPtr<String> m_input;
|
@@ -131,5 +139,10 @@ protected:
|
|
131
139
|
// then increment m_pair_depth
|
132
140
|
char m_start_char { 0 };
|
133
141
|
int m_pair_depth { 0 };
|
142
|
+
|
143
|
+
size_t m_remaining_method_names { 0 };
|
144
|
+
bool m_allow_assignment_method { false };
|
145
|
+
Token::Type m_method_name_separator { Token::Type::Invalid };
|
146
|
+
Token m_last_method_name {};
|
134
147
|
};
|
135
148
|
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#pragma once
|
2
|
+
|
3
|
+
#include "natalie_parser/node/block_node.hpp"
|
4
|
+
#include "natalie_parser/node/node.hpp"
|
5
|
+
#include "natalie_parser/node/node_with_args.hpp"
|
6
|
+
|
7
|
+
namespace NatalieParser {
|
8
|
+
|
9
|
+
using namespace TM;
|
10
|
+
|
11
|
+
class ForNode : public Node {
|
12
|
+
public:
|
13
|
+
ForNode(const Token &token, SharedPtr<Node> expr, SharedPtr<Node> vars, SharedPtr<BlockNode> body)
|
14
|
+
: Node { token }
|
15
|
+
, m_expr { expr }
|
16
|
+
, m_vars { vars }
|
17
|
+
, m_body { body } {
|
18
|
+
assert(m_expr);
|
19
|
+
assert(m_vars);
|
20
|
+
}
|
21
|
+
|
22
|
+
virtual Type type() const override { return Type::For; }
|
23
|
+
|
24
|
+
const SharedPtr<Node> expr() const { return m_expr; }
|
25
|
+
const SharedPtr<Node> vars() const { return m_vars; }
|
26
|
+
const SharedPtr<BlockNode> body() const { return m_body; }
|
27
|
+
|
28
|
+
virtual void transform(Creator *creator) const override {
|
29
|
+
creator->set_type("for");
|
30
|
+
creator->append(m_expr);
|
31
|
+
switch (m_vars->type()) {
|
32
|
+
case Node::Type::Identifier:
|
33
|
+
creator->with_assignment(true, [&]() { creator->append(*m_vars); });
|
34
|
+
break;
|
35
|
+
case Node::Type::MultipleAssignment:
|
36
|
+
creator->append(m_vars);
|
37
|
+
break;
|
38
|
+
default:
|
39
|
+
TM_UNREACHABLE();
|
40
|
+
}
|
41
|
+
if (!m_body->is_empty())
|
42
|
+
creator->append(m_body->without_unnecessary_nesting());
|
43
|
+
}
|
44
|
+
|
45
|
+
protected:
|
46
|
+
SharedPtr<Node> m_expr {};
|
47
|
+
SharedPtr<Node> m_vars {};
|
48
|
+
SharedPtr<BlockNode> m_body {};
|
49
|
+
};
|
50
|
+
}
|
@@ -13,6 +13,12 @@ using namespace TM;
|
|
13
13
|
|
14
14
|
class MatchNode : public Node {
|
15
15
|
public:
|
16
|
+
MatchNode(const Token &token, SharedPtr<RegexpNode> regexp)
|
17
|
+
: Node { token }
|
18
|
+
, m_regexp { regexp } {
|
19
|
+
assert(m_regexp);
|
20
|
+
}
|
21
|
+
|
16
22
|
MatchNode(const Token &token, SharedPtr<RegexpNode> regexp, SharedPtr<Node> arg, bool regexp_on_left)
|
17
23
|
: Node { token }
|
18
24
|
, m_regexp { regexp }
|
@@ -31,6 +31,7 @@
|
|
31
31
|
#include "natalie_parser/node/false_node.hpp"
|
32
32
|
#include "natalie_parser/node/fixnum_node.hpp"
|
33
33
|
#include "natalie_parser/node/float_node.hpp"
|
34
|
+
#include "natalie_parser/node/for_node.hpp"
|
34
35
|
#include "natalie_parser/node/forward_args_node.hpp"
|
35
36
|
#include "natalie_parser/node/hash_node.hpp"
|
36
37
|
#include "natalie_parser/node/hash_pattern_node.hpp"
|
@@ -49,16 +49,22 @@ public:
|
|
49
49
|
|
50
50
|
enum class Precedence;
|
51
51
|
|
52
|
+
enum class IterAllow {
|
53
|
+
NONE,
|
54
|
+
CURLY_ONLY,
|
55
|
+
CURLY_AND_BLOCK,
|
56
|
+
};
|
57
|
+
|
52
58
|
SharedPtr<Node> tree();
|
53
59
|
|
54
60
|
private:
|
55
|
-
bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
|
61
|
+
bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence, IterAllow iter_allow);
|
56
62
|
|
57
63
|
Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
|
58
64
|
|
59
65
|
bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
|
60
66
|
|
61
|
-
SharedPtr<Node> parse_expression(Precedence, LocalsHashmap
|
67
|
+
SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &, IterAllow = IterAllow::CURLY_AND_BLOCK);
|
62
68
|
|
63
69
|
SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
|
64
70
|
SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
|
@@ -67,8 +73,9 @@ private:
|
|
67
73
|
SharedPtr<Node> parse_if_body(LocalsHashmap &);
|
68
74
|
SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
|
69
75
|
|
76
|
+
void reinsert_collapsed_newline();
|
70
77
|
SharedPtr<Node> parse_alias(LocalsHashmap &);
|
71
|
-
SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char
|
78
|
+
SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *);
|
72
79
|
SharedPtr<Node> parse_array(LocalsHashmap &);
|
73
80
|
SharedPtr<Node> parse_back_ref(LocalsHashmap &);
|
74
81
|
SharedPtr<Node> parse_begin_block(LocalsHashmap &);
|
@@ -96,23 +103,27 @@ private:
|
|
96
103
|
Method,
|
97
104
|
Proc,
|
98
105
|
};
|
99
|
-
void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext);
|
106
|
+
void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &, ArgsContext, IterAllow = IterAllow::CURLY_AND_BLOCK);
|
107
|
+
SharedPtr<Node> parse_arg_default_value(LocalsHashmap &, IterAllow);
|
100
108
|
|
101
109
|
SharedPtr<Node> parse_encoding(LocalsHashmap &);
|
102
110
|
SharedPtr<Node> parse_end_block(LocalsHashmap &);
|
103
111
|
SharedPtr<Node> parse_file_constant(LocalsHashmap &);
|
112
|
+
SharedPtr<Node> parse_for(LocalsHashmap &);
|
104
113
|
SharedPtr<Node> parse_forward_args(LocalsHashmap &);
|
105
114
|
SharedPtr<Node> parse_group(LocalsHashmap &);
|
106
115
|
SharedPtr<Node> parse_hash(LocalsHashmap &);
|
107
116
|
SharedPtr<Node> parse_hash_inner(LocalsHashmap &, Precedence, Token::Type, bool, SharedPtr<Node> = {});
|
108
117
|
SharedPtr<Node> parse_identifier(LocalsHashmap &);
|
109
118
|
SharedPtr<Node> parse_if(LocalsHashmap &);
|
119
|
+
SharedPtr<Node> parse_if_branch(LocalsHashmap &, bool);
|
110
120
|
void parse_interpolated_body(LocalsHashmap &, InterpolatedNode &, Token::Type);
|
111
121
|
SharedPtr<Node> parse_interpolated_regexp(LocalsHashmap &);
|
112
122
|
int parse_regexp_options(String &);
|
113
123
|
SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
|
114
124
|
SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
|
115
125
|
SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
|
126
|
+
SharedPtr<Node> parse_line_constant(LocalsHashmap &);
|
116
127
|
SharedPtr<Node> parse_lit(LocalsHashmap &);
|
117
128
|
SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
|
118
129
|
SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
|
@@ -122,7 +133,7 @@ private:
|
|
122
133
|
SharedPtr<Node> parse_nil(LocalsHashmap &);
|
123
134
|
SharedPtr<Node> parse_not(LocalsHashmap &);
|
124
135
|
SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
|
125
|
-
void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap
|
136
|
+
void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &, IterAllow);
|
126
137
|
SharedPtr<Node> parse_redo(LocalsHashmap &);
|
127
138
|
SharedPtr<Node> parse_retry(LocalsHashmap &);
|
128
139
|
SharedPtr<Node> parse_return(LocalsHashmap &);
|
@@ -103,6 +103,7 @@ public:
|
|
103
103
|
NotKeyword,
|
104
104
|
NotMatch,
|
105
105
|
NthRef,
|
106
|
+
OperatorName,
|
106
107
|
OrKeyword,
|
107
108
|
Percent,
|
108
109
|
PercentEqual,
|
@@ -431,6 +432,8 @@ public:
|
|
431
432
|
return "!";
|
432
433
|
case Type::NthRef:
|
433
434
|
return "nth_ref";
|
435
|
+
case Type::OperatorName:
|
436
|
+
return "operator";
|
434
437
|
case Type::OrKeyword:
|
435
438
|
return "or";
|
436
439
|
case Type::Percent:
|
@@ -614,6 +617,7 @@ public:
|
|
614
617
|
case Token::Type::LessThanOrEqual:
|
615
618
|
case Token::Type::Match:
|
616
619
|
case Token::Type::Minus:
|
620
|
+
case Token::Type::Not:
|
617
621
|
case Token::Type::NotEqual:
|
618
622
|
case Token::Type::NotMatch:
|
619
623
|
case Token::Type::Percent:
|
@@ -635,6 +639,7 @@ public:
|
|
635
639
|
bool is_closing_token() const { return m_type == Type::RBracket || m_type == Type::RCurlyBrace || m_type == Type::RParen; }
|
636
640
|
bool is_comma() const { return m_type == Type::Comma; }
|
637
641
|
bool is_comment() const { return m_type == Type::Comment; }
|
642
|
+
bool is_constant_resolution() const { return m_type == Type::ConstantResolution; }
|
638
643
|
bool is_def_keyword() const { return m_type == Type::DefKeyword; }
|
639
644
|
bool is_doc() const { return m_type == Type::Doc; }
|
640
645
|
bool is_dot() const { return m_type == Type::Dot; }
|
@@ -743,9 +748,23 @@ public:
|
|
743
748
|
}
|
744
749
|
}
|
745
750
|
|
751
|
+
bool can_be_first_arg_of_def() const {
|
752
|
+
switch (m_type) {
|
753
|
+
case Token::Type::Ampersand:
|
754
|
+
case Token::Type::BareName:
|
755
|
+
case Token::Type::Star:
|
756
|
+
case Token::Type::StarStar:
|
757
|
+
case Token::Type::SymbolKey:
|
758
|
+
return true;
|
759
|
+
default:
|
760
|
+
return false;
|
761
|
+
}
|
762
|
+
}
|
763
|
+
|
746
764
|
bool can_be_first_arg_of_implicit_call() const {
|
747
765
|
switch (m_type) {
|
748
766
|
case Token::Type::Arrow:
|
767
|
+
case Token::Type::BackRef:
|
749
768
|
case Token::Type::BareName:
|
750
769
|
case Token::Type::BeginKeyword:
|
751
770
|
case Token::Type::Bignum:
|
@@ -800,6 +819,7 @@ public:
|
|
800
819
|
switch (m_type) {
|
801
820
|
case Token::Type::Equal:
|
802
821
|
case Token::Type::LBracket:
|
822
|
+
case Token::Type::LParen:
|
803
823
|
return true;
|
804
824
|
default:
|
805
825
|
if (is_operator())
|
@@ -808,8 +828,25 @@ public:
|
|
808
828
|
}
|
809
829
|
}
|
810
830
|
|
831
|
+
bool can_precede_regexp_literal() const {
|
832
|
+
switch (m_type) {
|
833
|
+
case Type::ElsifKeyword:
|
834
|
+
case Type::IfKeyword:
|
835
|
+
case Type::RescueKeyword:
|
836
|
+
case Type::ReturnKeyword:
|
837
|
+
case Type::UnlessKeyword:
|
838
|
+
case Type::UntilKeyword:
|
839
|
+
case Type::WhenKeyword:
|
840
|
+
case Type::WhileKeyword:
|
841
|
+
return true;
|
842
|
+
default:
|
843
|
+
return false;
|
844
|
+
}
|
845
|
+
}
|
846
|
+
|
811
847
|
bool can_precede_symbol_key() const {
|
812
848
|
switch (m_type) {
|
849
|
+
case Type::Arrow:
|
813
850
|
case Type::BareName:
|
814
851
|
case Type::Comma:
|
815
852
|
case Type::Constant:
|
@@ -818,6 +855,7 @@ public:
|
|
818
855
|
case Type::LParen:
|
819
856
|
case Type::Pipe:
|
820
857
|
case Type::PipePipe:
|
858
|
+
case Type::SuperKeyword:
|
821
859
|
return true;
|
822
860
|
default:
|
823
861
|
return false;
|
@@ -22,7 +22,7 @@ Token InterpolatedStringLexer::build_next_token() {
|
|
22
22
|
Token InterpolatedStringLexer::consume_string() {
|
23
23
|
SharedPtr<String> buf = new String;
|
24
24
|
while (auto c = current_char()) {
|
25
|
-
if (c == '\\') {
|
25
|
+
if (c == '\\' && m_stop_char != '\\') {
|
26
26
|
advance(); // backslash
|
27
27
|
auto result = consume_escaped_byte(*buf);
|
28
28
|
if (!result.first)
|
data/src/lexer/regexp_lexer.cpp
CHANGED
@@ -38,7 +38,7 @@ Token RegexpLexer::build_next_token() {
|
|
38
38
|
Token RegexpLexer::consume_regexp() {
|
39
39
|
SharedPtr<String> buf = new String;
|
40
40
|
while (auto c = current_char()) {
|
41
|
-
if (c == '\\') {
|
41
|
+
if (c == '\\' && m_stop_char != '\\') {
|
42
42
|
c = next();
|
43
43
|
switch (c) {
|
44
44
|
case '/':
|