natalie_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
@@ -0,0 +1,218 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/lexer.hpp"
4
+ #include "natalie_parser/node.hpp"
5
+ #include "natalie_parser/token.hpp"
6
+ #include "tm/string.hpp"
7
+
8
+ namespace NatalieParser {
9
+
10
+ using namespace TM;
11
+
12
+ class Parser {
13
+ public:
14
+ class SyntaxError {
15
+ public:
16
+ SyntaxError(const char *message)
17
+ : m_message { strdup(message) } {
18
+ assert(m_message);
19
+ }
20
+
21
+ SyntaxError(const String &message)
22
+ : SyntaxError { message.c_str() } { }
23
+
24
+ ~SyntaxError() {
25
+ free(m_message);
26
+ }
27
+
28
+ SyntaxError(const SyntaxError &) = delete;
29
+ SyntaxError &operator=(const SyntaxError &) = delete;
30
+
31
+ const char *message() { return m_message; }
32
+
33
+ private:
34
+ char *m_message { nullptr };
35
+ };
36
+
37
+ Parser(SharedPtr<String> code, SharedPtr<String> file)
38
+ : m_code { code }
39
+ , m_file { file } {
40
+ m_tokens = Lexer { m_code, m_file }.tokens();
41
+ m_call_depth.push(0);
42
+ }
43
+
44
+ ~Parser() {
45
+ // SharedPtr ftw
46
+ }
47
+
48
+ using LocalsHashmap = TM::Hashmap<TM::String>;
49
+
50
+ enum class Precedence;
51
+
52
+ SharedPtr<Node> tree();
53
+
54
+ private:
55
+ bool higher_precedence(Token &token, SharedPtr<Node> left, Precedence current_precedence);
56
+
57
+ Precedence get_precedence(Token &token, SharedPtr<Node> left = {});
58
+
59
+ bool is_first_arg_of_call_without_parens(SharedPtr<Node>, Token &);
60
+
61
+ SharedPtr<Node> parse_expression(Precedence, LocalsHashmap &);
62
+
63
+ SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, std::function<bool(Token::Type)>, bool = false);
64
+ SharedPtr<BlockNode> parse_body(LocalsHashmap &, Precedence, Token::Type = Token::Type::EndKeyword, bool = false);
65
+ SharedPtr<BlockNode> parse_case_in_body(LocalsHashmap &);
66
+ SharedPtr<BlockNode> parse_case_when_body(LocalsHashmap &);
67
+ SharedPtr<Node> parse_if_body(LocalsHashmap &);
68
+ SharedPtr<BlockNode> parse_def_body(LocalsHashmap &);
69
+
70
+ SharedPtr<Node> parse_alias(LocalsHashmap &);
71
+ SharedPtr<SymbolNode> parse_alias_arg(LocalsHashmap &, const char *, bool);
72
+ SharedPtr<Node> parse_array(LocalsHashmap &);
73
+ SharedPtr<Node> parse_back_ref(LocalsHashmap &);
74
+ SharedPtr<Node> parse_begin_block(LocalsHashmap &);
75
+ SharedPtr<Node> parse_begin(LocalsHashmap &);
76
+ void parse_rest_of_begin(BeginNode &, LocalsHashmap &);
77
+ SharedPtr<Node> parse_beginless_range(LocalsHashmap &);
78
+ SharedPtr<Node> parse_block_pass(LocalsHashmap &);
79
+ SharedPtr<Node> parse_bool(LocalsHashmap &);
80
+ SharedPtr<Node> parse_break(LocalsHashmap &);
81
+ SharedPtr<Node> parse_class(LocalsHashmap &);
82
+ SharedPtr<Node> parse_class_or_module_name(LocalsHashmap &);
83
+ SharedPtr<Node> parse_case(LocalsHashmap &);
84
+ SharedPtr<Node> parse_case_in_pattern(LocalsHashmap &);
85
+ SharedPtr<Node> parse_case_in_patterns(LocalsHashmap &);
86
+ void parse_comma_separated_expressions(ArrayNode &, LocalsHashmap &);
87
+ SharedPtr<Node> parse_constant(LocalsHashmap &);
88
+ SharedPtr<Node> parse_def(LocalsHashmap &);
89
+ SharedPtr<Node> parse_defined(LocalsHashmap &);
90
+ void parse_def_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
91
+ void parse_def_single_arg(Vector<SharedPtr<Node>> &, LocalsHashmap &);
92
+ SharedPtr<Node> parse_encoding(LocalsHashmap &);
93
+ SharedPtr<Node> parse_end_block(LocalsHashmap &);
94
+ SharedPtr<Node> parse_file_constant(LocalsHashmap &);
95
+ SharedPtr<Node> parse_group(LocalsHashmap &);
96
+ SharedPtr<Node> parse_hash(LocalsHashmap &);
97
+ SharedPtr<Node> parse_hash_inner(LocalsHashmap &, Precedence, Token::Type, SharedPtr<Node> = {});
98
+ SharedPtr<Node> parse_identifier(LocalsHashmap &);
99
+ SharedPtr<Node> parse_if(LocalsHashmap &);
100
+ void parse_interpolated_body(LocalsHashmap &, InterpolatedNode &, Token::Type);
101
+ SharedPtr<Node> parse_interpolated_regexp(LocalsHashmap &);
102
+ int parse_regexp_options(String &);
103
+ SharedPtr<Node> parse_interpolated_shell(LocalsHashmap &);
104
+ SharedPtr<Node> parse_interpolated_string(LocalsHashmap &);
105
+ SharedPtr<Node> parse_interpolated_symbol(LocalsHashmap &);
106
+ SharedPtr<Node> parse_lit(LocalsHashmap &);
107
+ SharedPtr<Node> parse_keyword_splat(LocalsHashmap &);
108
+ SharedPtr<Node> parse_keyword_splat_wrapped_in_hash(LocalsHashmap &);
109
+ SharedPtr<String> parse_method_name(LocalsHashmap &);
110
+ SharedPtr<Node> parse_module(LocalsHashmap &);
111
+ SharedPtr<Node> parse_next(LocalsHashmap &);
112
+ SharedPtr<Node> parse_nil(LocalsHashmap &);
113
+ SharedPtr<Node> parse_not(LocalsHashmap &);
114
+ SharedPtr<Node> parse_nth_ref(LocalsHashmap &);
115
+ void parse_proc_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
116
+ SharedPtr<Node> parse_redo(LocalsHashmap &);
117
+ SharedPtr<Node> parse_retry(LocalsHashmap &);
118
+ SharedPtr<Node> parse_return(LocalsHashmap &);
119
+ SharedPtr<Node> parse_sclass(LocalsHashmap &);
120
+ SharedPtr<Node> parse_self(LocalsHashmap &);
121
+ void parse_shadow_variables_in_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
122
+ SharedPtr<String> parse_shadow_variable_single_arg();
123
+ SharedPtr<Node> parse_splat(LocalsHashmap &);
124
+ SharedPtr<Node> parse_stabby_proc(LocalsHashmap &);
125
+ SharedPtr<Node> parse_string(LocalsHashmap &);
126
+ SharedPtr<Node> parse_super(LocalsHashmap &);
127
+ SharedPtr<Node> parse_symbol(LocalsHashmap &);
128
+ SharedPtr<Node> parse_symbol_key(LocalsHashmap &);
129
+ SharedPtr<Node> parse_statement_keyword(LocalsHashmap &);
130
+ SharedPtr<Node> parse_top_level_constant(LocalsHashmap &);
131
+ SharedPtr<Node> parse_unary_operator(LocalsHashmap &);
132
+ SharedPtr<Node> parse_undef(LocalsHashmap &);
133
+ SharedPtr<Node> parse_unless(LocalsHashmap &);
134
+ SharedPtr<Node> parse_while(LocalsHashmap &);
135
+ SharedPtr<Node> parse_word_array(LocalsHashmap &);
136
+ SharedPtr<Node> parse_word_symbol_array(LocalsHashmap &);
137
+ SharedPtr<Node> parse_yield(LocalsHashmap &);
138
+
139
+ SharedPtr<Node> parse_assignment_expression(SharedPtr<Node>, LocalsHashmap &);
140
+ SharedPtr<Node> parse_assignment_expression_without_multiple_values(SharedPtr<Node>, LocalsHashmap &);
141
+ SharedPtr<Node> parse_assignment_expression(SharedPtr<Node>, LocalsHashmap &, bool);
142
+ SharedPtr<Node> parse_assignment_expression_value(bool, LocalsHashmap &, bool);
143
+ SharedPtr<Node> parse_assignment_identifier(bool, LocalsHashmap &);
144
+ SharedPtr<Node> parse_call_expression_without_parens(SharedPtr<Node>, LocalsHashmap &);
145
+ SharedPtr<Node> parse_call_expression_with_parens(SharedPtr<Node>, LocalsHashmap &);
146
+ SharedPtr<Node> parse_call_hash_args(LocalsHashmap &, bool, Token::Type, SharedPtr<Node>);
147
+ SharedPtr<Node> parse_constant_resolution_expression(SharedPtr<Node>, LocalsHashmap &);
148
+ SharedPtr<Node> parse_infix_expression(SharedPtr<Node>, LocalsHashmap &);
149
+ SharedPtr<Node> parse_proc_call_expression(SharedPtr<Node>, LocalsHashmap &);
150
+ SharedPtr<Node> parse_iter_expression(SharedPtr<Node>, LocalsHashmap &);
151
+ SharedPtr<BlockNode> parse_iter_body(LocalsHashmap &, bool);
152
+ SharedPtr<Node> parse_logical_expression(SharedPtr<Node>, LocalsHashmap &);
153
+ SharedPtr<Node> parse_match_expression(SharedPtr<Node>, LocalsHashmap &);
154
+ SharedPtr<Node> parse_modifier_expression(SharedPtr<Node>, LocalsHashmap &);
155
+ SharedPtr<Node> parse_multiple_assignment_expression(SharedPtr<Node>, LocalsHashmap &);
156
+ SharedPtr<Node> parse_not_match_expression(SharedPtr<Node>, LocalsHashmap &);
157
+ SharedPtr<Node> parse_op_assign_expression(SharedPtr<Node>, LocalsHashmap &);
158
+ SharedPtr<Node> parse_op_attr_assign_expression(SharedPtr<Node>, LocalsHashmap &);
159
+ SharedPtr<Node> parse_range_expression(SharedPtr<Node>, LocalsHashmap &);
160
+ SharedPtr<Node> parse_ref_expression(SharedPtr<Node>, LocalsHashmap &);
161
+ SharedPtr<Node> parse_rescue_expression(SharedPtr<Node>, LocalsHashmap &);
162
+ SharedPtr<Node> parse_safe_send_expression(SharedPtr<Node>, LocalsHashmap &);
163
+ SharedPtr<Node> parse_send_expression(SharedPtr<Node>, LocalsHashmap &);
164
+ SharedPtr<Node> parse_ternary_expression(SharedPtr<Node>, LocalsHashmap &);
165
+
166
+ void parse_call_args(NodeWithArgs &, LocalsHashmap &, bool = false, Token::Type = Token::Type::RParen);
167
+ void parse_iter_args(Vector<SharedPtr<Node>> &, LocalsHashmap &);
168
+
169
+ using parse_null_fn = SharedPtr<Node> (Parser::*)(LocalsHashmap &);
170
+ using parse_left_fn = SharedPtr<Node> (Parser::*)(SharedPtr<Node>, LocalsHashmap &);
171
+
172
+ parse_null_fn null_denotation(Token::Type);
173
+ parse_left_fn left_denotation(Token &, SharedPtr<Node>, Precedence);
174
+
175
+ bool treat_left_bracket_as_element_reference(SharedPtr<Node> left, Token &token) {
176
+ return !token.whitespace_precedes() || (left->type() == Node::Type::Identifier && left.static_cast_as<IdentifierNode>()->is_lvar());
177
+ }
178
+
179
+ // convert ((x and y) and z) to (x and (y and z))
180
+ template <typename T>
181
+ SharedPtr<Node> regroup(Token &token, SharedPtr<Node> left, SharedPtr<Node> right) {
182
+ auto left_node = left.static_cast_as<T>();
183
+ return new T { left_node->token(), left_node->left(), new T { token, left_node->right(), right } };
184
+ };
185
+
186
+ SharedPtr<Node> append_string_nodes(SharedPtr<Node> string1, SharedPtr<Node> string2);
187
+ SharedPtr<Node> concat_adjacent_strings(SharedPtr<Node> string, LocalsHashmap &locals, bool &strings_were_appended);
188
+
189
+ SharedPtr<NodeWithArgs> to_node_with_args(SharedPtr<Node> node);
190
+
191
+ Token &current_token() const;
192
+ Token &peek_token() const;
193
+
194
+ void next_expression();
195
+ void skip_newlines();
196
+
197
+ void expect(Token::Type, const char *);
198
+ [[noreturn]] void throw_unexpected(const Token &, const char *, const char * = nullptr);
199
+ [[noreturn]] void throw_unexpected(const char *);
200
+ [[noreturn]] void throw_unterminated_thing(Token, Token = {});
201
+
202
+ void advance() { m_index++; }
203
+ void rewind() { m_index--; }
204
+
205
+ String code_line(size_t number);
206
+ String current_line();
207
+
208
+ void validate_current_token();
209
+
210
+ SharedPtr<String> m_code;
211
+ SharedPtr<String> m_file;
212
+ size_t m_index { 0 };
213
+ SharedPtr<Vector<Token>> m_tokens {};
214
+
215
+ Vector<Precedence> m_precedence_stack {};
216
+ Vector<unsigned int> m_call_depth {};
217
+ };
218
+ }