natalie_parser 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
@@ -0,0 +1,113 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/creator.hpp"
4
+ #include "natalie_parser/node.hpp"
5
+
6
+ namespace NatalieParser {
7
+
8
+ class DebugCreator : public Creator {
9
+ public:
10
+ virtual ~DebugCreator() { }
11
+
12
+ virtual void set_comments(const TM::String &) override {
13
+ // ignore for now
14
+ }
15
+
16
+ virtual void set_type(const char *type) override {
17
+ if (m_nodes.size() >= 1)
18
+ m_nodes[0] = String::format(":{}", type);
19
+ else
20
+ m_nodes.push_front(String::format(":{}", type));
21
+ }
22
+
23
+ virtual void append(const Node &node) override {
24
+ if (node.type() == Node::Type::Nil) {
25
+ m_nodes.push("nil");
26
+ return;
27
+ }
28
+ DebugCreator creator;
29
+ creator.set_assignment(assignment());
30
+ node.transform(&creator);
31
+ m_nodes.push(creator.to_string());
32
+ }
33
+
34
+ virtual void append_array(const ArrayNode &array) override {
35
+ DebugCreator creator;
36
+ creator.set_assignment(assignment());
37
+ array.ArrayNode::transform(&creator);
38
+ m_nodes.push(creator.to_string());
39
+ }
40
+
41
+ virtual void append_false() override {
42
+ m_nodes.push("false");
43
+ }
44
+
45
+ virtual void append_float(double number) override {
46
+ m_nodes.push(String(number));
47
+ }
48
+
49
+ virtual void append_integer(long long number) override {
50
+ m_nodes.push(String(number));
51
+ }
52
+
53
+ virtual void append_integer(TM::String &number) override {
54
+ m_nodes.push(String(number));
55
+ }
56
+
57
+ virtual void append_nil() override {
58
+ m_nodes.push("nil");
59
+ }
60
+
61
+ virtual void append_range(long long first, long long last, bool exclude_end) override {
62
+ m_nodes.push(String(first));
63
+ m_nodes.push(exclude_end ? "..." : "..");
64
+ m_nodes.push(String(last));
65
+ }
66
+
67
+ virtual void append_regexp(TM::String &pattern, int options) override {
68
+ TM_UNUSED(options);
69
+ m_nodes.push('/');
70
+ m_nodes.push(pattern);
71
+ m_nodes.push('/');
72
+ }
73
+
74
+ virtual void append_sexp(std::function<void(Creator *)> fn) override {
75
+ DebugCreator creator;
76
+ fn(&creator);
77
+ m_nodes.push(creator.to_string());
78
+ }
79
+
80
+ virtual void append_string(TM::String &string) override {
81
+ m_nodes.push(String::format("\"{}\"", string));
82
+ }
83
+
84
+ virtual void append_symbol(TM::String &name) override {
85
+ m_nodes.push(String::format(":{}", name));
86
+ }
87
+
88
+ virtual void append_true() override {
89
+ m_nodes.push("true");
90
+ }
91
+
92
+ virtual void wrap(const char *type) override {
93
+ auto inner = to_string();
94
+ m_nodes.clear();
95
+ set_type(type);
96
+ m_nodes.push(inner);
97
+ }
98
+
99
+ TM::String to_string() {
100
+ TM::String buf = "(";
101
+ for (size_t i = 0; i < m_nodes.size(); ++i) {
102
+ buf.append(m_nodes[i]);
103
+ if (i + 1 < m_nodes.size())
104
+ buf.append(", ");
105
+ }
106
+ buf.append_char(')');
107
+ return buf;
108
+ }
109
+
110
+ private:
111
+ TM::Vector<TM::String> m_nodes {};
112
+ };
113
+ }
@@ -0,0 +1,108 @@
1
+ #pragma once
2
+
3
+ #include "tm/shared_ptr.hpp"
4
+ #include "tm/string.hpp"
5
+ #include "tm/vector.hpp"
6
+ #include <functional>
7
+ #include <initializer_list>
8
+
9
+ namespace NatalieParser {
10
+
11
+ class Node;
12
+ class ArrayNode;
13
+
14
+ class Creator {
15
+ public:
16
+ Creator() { }
17
+
18
+ Creator(TM::SharedPtr<const TM::String> file, size_t line, size_t column)
19
+ : m_file { file }
20
+ , m_line { line }
21
+ , m_column { column } { }
22
+
23
+ virtual void set_comments(const TM::String &comments) = 0;
24
+ virtual void set_type(const char *type) = 0;
25
+ virtual void append(const TM::SharedPtr<Node> node) { append(*node); }
26
+ virtual void append(const Node &node) = 0;
27
+ virtual void append_array(const TM::SharedPtr<ArrayNode> array) { append_array(*array); }
28
+ virtual void append_array(const ArrayNode &array) = 0;
29
+ virtual void append_false() = 0;
30
+ virtual void append_float(double number) = 0;
31
+ virtual void append_integer(long long number) = 0;
32
+ virtual void append_integer(TM::String &number) = 0;
33
+ virtual void append_nil() = 0;
34
+ virtual void append_range(long long first, long long last, bool exclude_end) = 0;
35
+ virtual void append_regexp(TM::String &pattern, int options) = 0;
36
+ virtual void append_sexp(std::function<void(Creator *)> fn) = 0;
37
+ virtual void append_string(TM::String &string) = 0;
38
+ virtual void append_symbol(TM::String &symbol) = 0;
39
+ virtual void append_true() = 0;
40
+ virtual void wrap(const char *type) = 0;
41
+
42
+ virtual ~Creator() { }
43
+
44
+ void append_nil_sexp() {
45
+ append_sexp([&](Creator *c) { c->set_type("nil"); });
46
+ }
47
+
48
+ void append_regexp(TM::SharedPtr<TM::String> pattern_ptr, int options) {
49
+ if (!pattern_ptr) {
50
+ auto p = TM::String("");
51
+ append_regexp(p, options);
52
+ }
53
+ append_regexp(*pattern_ptr, options);
54
+ }
55
+
56
+ void append_string(const char *string) {
57
+ auto s = TM::String(string);
58
+ append_string(s);
59
+ }
60
+
61
+ void append_string(TM::SharedPtr<TM::String> string_ptr) {
62
+ if (!string_ptr) {
63
+ auto s = TM::String("");
64
+ append_string(s);
65
+ }
66
+ append_string(*string_ptr);
67
+ }
68
+
69
+ void append_symbol(const char *symbol) {
70
+ auto s = TM::String(symbol);
71
+ append_symbol(s);
72
+ }
73
+
74
+ void append_symbol(TM::SharedPtr<TM::String> symbol_ptr) {
75
+ if (!symbol_ptr) {
76
+ auto s = TM::String("");
77
+ append_symbol(s);
78
+ }
79
+ append_symbol(*symbol_ptr);
80
+ }
81
+
82
+ bool assignment() { return m_assignment; }
83
+ void set_assignment(bool assignment) { m_assignment = assignment; }
84
+
85
+ void with_assignment(bool assignment, std::function<void()> fn) {
86
+ auto assignment_was = m_assignment;
87
+ m_assignment = assignment;
88
+ fn();
89
+ m_assignment = assignment_was;
90
+ }
91
+
92
+ TM::SharedPtr<const TM::String> file() const { return m_file; }
93
+ size_t line() const { return m_line; }
94
+ size_t column() const { return m_column; }
95
+
96
+ void set_line(size_t line) { m_line = line; }
97
+ void set_column(size_t column) { m_column = column; }
98
+
99
+ virtual void reset_sexp() { }
100
+
101
+ private:
102
+ bool m_assignment { false };
103
+ TM::SharedPtr<const TM::String> m_file {};
104
+ size_t m_line { 0 };
105
+ size_t m_column { 0 };
106
+ };
107
+
108
+ }
@@ -0,0 +1,64 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/lexer.hpp"
4
+ #include "natalie_parser/token.hpp"
5
+ #include "tm/shared_ptr.hpp"
6
+ #include "tm/vector.hpp"
7
+
8
+ namespace NatalieParser {
9
+
10
+ class InterpolatedStringLexer : public Lexer {
11
+ public:
12
+ InterpolatedStringLexer(Lexer &parent_lexer, char start_char, char stop_char, Token::Type end_type)
13
+ : Lexer { parent_lexer }
14
+ , m_end_type { end_type } {
15
+ set_nested_lexer(nullptr);
16
+ set_start_char(start_char == stop_char ? 0 : start_char);
17
+ set_stop_char(stop_char);
18
+ }
19
+
20
+ // used for lexing a Heredoc
21
+ InterpolatedStringLexer(Lexer &parent_lexer, Token string_token, Token::Type end_type)
22
+ : Lexer { string_token.literal_string(), parent_lexer.file() }
23
+ , m_end_type { end_type }
24
+ , m_alters_parent_cursor_position { false } {
25
+ set_cursor_line(parent_lexer.cursor_line() + 1); // the line after the heredoc delimiter
26
+ set_nested_lexer(nullptr);
27
+ set_stop_char(0);
28
+ }
29
+
30
+ virtual bool alters_parent_cursor_position() override { return m_alters_parent_cursor_position; }
31
+
32
+ private:
33
+ virtual Token build_next_token() override;
34
+ Token consume_string();
35
+ Token start_evaluation();
36
+ Token stop_evaluation();
37
+ Token finish();
38
+
39
+ virtual bool skip_whitespace() override { return false; }
40
+
41
+ /**
42
+ * a little state machine
43
+ * stateDiagram-v2
44
+ * [*] --> InProgress
45
+ * InProgress --> EvaluateBegin
46
+ * InProgress --> EndToken
47
+ * EvaluateBegin --> EvaluateEnd
48
+ * EvaluateEnd --> InProgress
49
+ * EndToken --> Done
50
+ * Done --> [*]
51
+ */
52
+ enum class State {
53
+ InProgress,
54
+ EvaluateBegin,
55
+ EvaluateEnd,
56
+ EndToken,
57
+ Done,
58
+ };
59
+
60
+ State m_state { State::InProgress };
61
+ Token::Type m_end_type;
62
+ bool m_alters_parent_cursor_position { true };
63
+ };
64
+ }
@@ -0,0 +1,37 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/lexer.hpp"
4
+ #include "natalie_parser/token.hpp"
5
+ #include "tm/shared_ptr.hpp"
6
+ #include "tm/vector.hpp"
7
+
8
+ namespace NatalieParser {
9
+
10
+ class RegexpLexer : public Lexer {
11
+ public:
12
+ RegexpLexer(Lexer &parent_lexer, char start_char, char stop_char)
13
+ : Lexer { parent_lexer } {
14
+ set_nested_lexer(nullptr);
15
+ set_start_char(start_char == stop_char ? 0 : start_char);
16
+ set_stop_char(stop_char);
17
+ }
18
+
19
+ private:
20
+ virtual Token build_next_token() override;
21
+ Token consume_regexp();
22
+ String *consume_options();
23
+
24
+ virtual bool skip_whitespace() override { return false; }
25
+
26
+ enum class State {
27
+ InProgress,
28
+ EvaluateBegin,
29
+ EvaluateEnd,
30
+ EndToken,
31
+ Done,
32
+ };
33
+
34
+ State m_state { State::InProgress };
35
+ SharedPtr<String> m_options {};
36
+ };
37
+ }
@@ -0,0 +1,57 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/lexer.hpp"
4
+ #include "natalie_parser/token.hpp"
5
+ #include "tm/shared_ptr.hpp"
6
+ #include "tm/vector.hpp"
7
+
8
+ namespace NatalieParser {
9
+
10
+ class WordArrayLexer : public Lexer {
11
+ public:
12
+ WordArrayLexer(Lexer &parent_lexer, char start_char, char stop_char, bool interpolated)
13
+ : Lexer { parent_lexer }
14
+ , m_interpolated { interpolated }
15
+ , m_start_char { start_char } {
16
+ set_nested_lexer(nullptr);
17
+ set_stop_char(stop_char);
18
+ }
19
+
20
+ private:
21
+ virtual Token build_next_token() override;
22
+ Token consume_array();
23
+
24
+ virtual bool skip_whitespace() override { return false; }
25
+
26
+ bool interpolated() const { return m_interpolated; }
27
+
28
+ // states
29
+ enum class State {
30
+ InProgress,
31
+ DynamicStringBegin,
32
+ DynamicStringInProgress,
33
+ DynamicStringEnd,
34
+ EvaluateBegin,
35
+ EvaluateEnd,
36
+ EndToken,
37
+ Done,
38
+ };
39
+
40
+ // transitions
41
+ Token in_progress_start_dynamic_string();
42
+ Token start_evaluation();
43
+ Token dynamic_string_finish();
44
+ Token in_progress_finish();
45
+
46
+ State m_state { State::InProgress };
47
+
48
+ // if this is true, then process #{...} interpolation
49
+ bool m_interpolated { false };
50
+
51
+ // if we encounter the m_start_char within the array,
52
+ // then increment m_pair_depth
53
+ char m_start_char { 0 };
54
+ int m_pair_depth { 0 };
55
+ SharedPtr<String> m_buffer;
56
+ };
57
+ }
@@ -0,0 +1,135 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/token.hpp"
4
+ #include "tm/shared_ptr.hpp"
5
+ #include "tm/vector.hpp"
6
+
7
+ namespace NatalieParser {
8
+
9
+ class Lexer {
10
+ public:
11
+ Lexer(SharedPtr<String> input, SharedPtr<String> file)
12
+ : m_input { input }
13
+ , m_file { file }
14
+ , m_size { input->length() } { }
15
+
16
+ Lexer(const Lexer &other, char start_char, char stop_char)
17
+ : m_input { other.m_input }
18
+ , m_file { other.m_file }
19
+ , m_size { other.m_size }
20
+ , m_index { other.m_index }
21
+ , m_cursor_line { other.m_cursor_line }
22
+ , m_cursor_column { other.m_cursor_column }
23
+ , m_token_line { other.m_token_line }
24
+ , m_token_column { other.m_token_column }
25
+ , m_stop_char { stop_char }
26
+ , m_start_char { start_char } { }
27
+
28
+ SharedPtr<Vector<Token>> tokens();
29
+ Token next_token();
30
+
31
+ virtual ~Lexer() {
32
+ delete m_nested_lexer;
33
+ }
34
+
35
+ SharedPtr<String> file() const { return m_file; }
36
+
37
+ size_t cursor_line() const { return m_cursor_line; }
38
+ void set_cursor_line(size_t cursor_line) { m_cursor_line = cursor_line; }
39
+
40
+ void set_nested_lexer(Lexer *lexer) { m_nested_lexer = lexer; }
41
+ void set_start_char(char c) { m_start_char = c; }
42
+ void set_stop_char(char c) { m_stop_char = c; }
43
+
44
+ virtual bool alters_parent_cursor_position() { return true; }
45
+
46
+ protected:
47
+ char current_char() {
48
+ if (m_index >= m_size)
49
+ return 0;
50
+ char c = m_input->at(m_index);
51
+ return c;
52
+ }
53
+
54
+ bool match(size_t bytes, const char *compare);
55
+ void advance();
56
+ void advance(size_t bytes);
57
+ void rewind(size_t bytes = 1);
58
+
59
+ char next() {
60
+ advance();
61
+ return current_char();
62
+ }
63
+
64
+ char peek() {
65
+ if (m_index + 1 >= m_size)
66
+ return 0;
67
+ return m_input->at(m_index + 1);
68
+ }
69
+
70
+ virtual bool skip_whitespace();
71
+ virtual Token build_next_token();
72
+ Token consume_symbol();
73
+ Token consume_word(Token::Type type);
74
+ Token consume_bare_name();
75
+ Token consume_constant();
76
+ Token consume_global_variable();
77
+ Token consume_heredoc();
78
+ Token consume_numeric();
79
+ Token consume_numeric_as_float(SharedPtr<String>);
80
+ Token consume_nth_ref();
81
+ long long consume_hex_number(int max_length = 0, bool allow_underscore = false);
82
+ long long consume_octal_number(int max_length = 0, bool allow_underscore = false);
83
+ Token consume_double_quoted_string(char, char, Token::Type begin_type = Token::Type::InterpolatedStringBegin, Token::Type end_type = Token::Type::InterpolatedStringEnd);
84
+ Token consume_single_quoted_string(char, char);
85
+ Token consume_quoted_array_without_interpolation(char start_char, char stop_char, Token::Type type);
86
+ Token consume_quoted_array_with_interpolation(char start_char, char stop_char, Token::Type type);
87
+ Token consume_regexp(char start_char, char stop_char);
88
+ SharedPtr<String> consume_non_whitespace();
89
+
90
+ void utf32_codepoint_to_utf8(String &buf, long long codepoint);
91
+ std::pair<bool, Token::Type> consume_escaped_byte(String &buf);
92
+
93
+ Token chars_to_fixnum_or_bignum_token(SharedPtr<String> chars, int base, int offset);
94
+
95
+ bool token_is_first_on_line() const;
96
+
97
+ bool char_can_be_string_or_regexp_delimiter(char c) const {
98
+ return (c >= '!' && c <= '/') || c == ':' || c == '?' || c == '@' || c == '~' || c == '|' || (c >= '^' && c <= '`');
99
+ }
100
+
101
+ SharedPtr<String> m_input;
102
+ SharedPtr<String> m_file;
103
+ size_t m_size { 0 };
104
+ size_t m_index { 0 };
105
+
106
+ // where we should jump after each heredoc
107
+ Vector<size_t> m_heredoc_stack {};
108
+
109
+ // current character position
110
+ size_t m_cursor_line { 0 };
111
+ size_t m_cursor_column { 0 };
112
+
113
+ // start of current token
114
+ size_t m_token_line { 0 };
115
+ size_t m_token_column { 0 };
116
+
117
+ // if the current token is preceded by whitespace
118
+ bool m_whitespace_precedes { false };
119
+
120
+ // the previously-matched token
121
+ Token m_last_token {};
122
+
123
+ // we have an open ternary '?' that needs a matching ':'
124
+ bool m_open_ternary { false };
125
+
126
+ Lexer *m_nested_lexer { nullptr };
127
+
128
+ char m_stop_char { 0 };
129
+
130
+ // if we encounter the m_start_char within the string,
131
+ // then increment m_pair_depth
132
+ char m_start_char { 0 };
133
+ int m_pair_depth { 0 };
134
+ };
135
+ }
@@ -0,0 +1,35 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/node/node.hpp"
4
+ #include "natalie_parser/node/node_with_args.hpp"
5
+ #include "natalie_parser/node/symbol_node.hpp"
6
+ #include "tm/hashmap.hpp"
7
+ #include "tm/owned_ptr.hpp"
8
+ #include "tm/string.hpp"
9
+
10
+ namespace NatalieParser {
11
+
12
+ using namespace TM;
13
+
14
+ class AliasNode : public Node {
15
+ public:
16
+ AliasNode(const Token &token, SharedPtr<SymbolNode> new_name, SharedPtr<SymbolNode> existing_name)
17
+ : Node { token }
18
+ , m_new_name { new_name }
19
+ , m_existing_name { existing_name } {
20
+ assert(m_new_name);
21
+ assert(m_existing_name);
22
+ }
23
+
24
+ virtual Type type() const override { return Type::Alias; }
25
+
26
+ const SharedPtr<SymbolNode> new_name() const { return m_new_name; }
27
+ const SharedPtr<SymbolNode> existing_name() const { return m_existing_name; }
28
+
29
+ virtual void transform(Creator *creator) const override;
30
+
31
+ private:
32
+ SharedPtr<SymbolNode> m_new_name {};
33
+ SharedPtr<SymbolNode> m_existing_name {};
34
+ };
35
+ }
@@ -0,0 +1,74 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/node/node.hpp"
4
+ #include "natalie_parser/node/node_with_args.hpp"
5
+ #include "tm/hashmap.hpp"
6
+ #include "tm/owned_ptr.hpp"
7
+ #include "tm/string.hpp"
8
+
9
+ namespace NatalieParser {
10
+
11
+ using namespace TM;
12
+
13
+ class ArgNode : public Node {
14
+ public:
15
+ ArgNode(const Token &token)
16
+ : Node { token } { }
17
+
18
+ ArgNode(const Token &token, SharedPtr<String> name)
19
+ : Node { token }
20
+ , m_name { name } { }
21
+
22
+ virtual Type type() const override { return Type::Arg; }
23
+
24
+ const SharedPtr<String> name() const { return m_name; }
25
+
26
+ void append_name(Creator *creator) const {
27
+ String n;
28
+ if (m_name)
29
+ n = m_name->clone();
30
+ if (m_splat) {
31
+ n.prepend_char('*');
32
+ } else if (m_kwsplat) {
33
+ n.prepend_char('*');
34
+ n.prepend_char('*');
35
+ } else if (m_block_arg) {
36
+ n.prepend_char('&');
37
+ }
38
+ creator->append_symbol(n);
39
+ }
40
+
41
+ bool splat_or_kwsplat() const { return m_splat || m_kwsplat; }
42
+
43
+ bool splat() const { return m_splat; }
44
+ void set_splat(bool splat) { m_splat = splat; }
45
+
46
+ bool kwsplat() const { return m_kwsplat; }
47
+ void set_kwsplat(bool kwsplat) { m_kwsplat = kwsplat; }
48
+
49
+ bool block_arg() const { return m_block_arg; }
50
+ void set_block_arg(bool block_arg) { m_block_arg = block_arg; }
51
+
52
+ const SharedPtr<Node> value() const { return m_value; }
53
+
54
+ void set_value(SharedPtr<Node> value) { m_value = value; }
55
+
56
+ void add_to_locals(TM::Hashmap<TM::String> &locals) {
57
+ locals.set(m_name->c_str());
58
+ }
59
+
60
+ virtual void transform(Creator *creator) const override {
61
+ creator->set_type("lasgn");
62
+ append_name(creator);
63
+ if (m_value)
64
+ creator->append(m_value.ref());
65
+ }
66
+
67
+ protected:
68
+ SharedPtr<String> m_name {};
69
+ bool m_block_arg { false };
70
+ bool m_splat { false };
71
+ bool m_kwsplat { false };
72
+ SharedPtr<Node> m_value {};
73
+ };
74
+ }
@@ -0,0 +1,34 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/node/node.hpp"
4
+ #include "natalie_parser/node/node_with_args.hpp"
5
+ #include "tm/hashmap.hpp"
6
+ #include "tm/string.hpp"
7
+
8
+ namespace NatalieParser {
9
+
10
+ using namespace TM;
11
+
12
+ class ArrayNode : public Node {
13
+ public:
14
+ ArrayNode(const Token &token)
15
+ : Node { token } { }
16
+
17
+ virtual Type type() const override { return Type::Array; }
18
+
19
+ void add_node(SharedPtr<Node> node) {
20
+ m_nodes.push(node);
21
+ }
22
+
23
+ const Vector<SharedPtr<Node>> &nodes() const { return m_nodes; }
24
+
25
+ virtual void transform(Creator *creator) const override {
26
+ creator->set_type("array");
27
+ for (auto node : m_nodes)
28
+ creator->append(node);
29
+ }
30
+
31
+ protected:
32
+ Vector<SharedPtr<Node>> m_nodes {};
33
+ };
34
+ }
@@ -0,0 +1,28 @@
1
+ #pragma once
2
+
3
+ #include "natalie_parser/node/array_node.hpp"
4
+ #include "natalie_parser/node/node.hpp"
5
+ #include "natalie_parser/node/node_with_args.hpp"
6
+ #include "tm/hashmap.hpp"
7
+ #include "tm/string.hpp"
8
+
9
+ namespace NatalieParser {
10
+
11
+ using namespace TM;
12
+
13
+ class ArrayPatternNode : public ArrayNode {
14
+ public:
15
+ ArrayPatternNode(const Token &token)
16
+ : ArrayNode { token } { }
17
+
18
+ virtual Type type() const override { return Type::ArrayPattern; }
19
+
20
+ virtual void transform(Creator *creator) const override {
21
+ creator->set_type("array_pat");
22
+ if (!m_nodes.is_empty())
23
+ creator->append_nil(); // NOTE: I don't know what this nil is for
24
+ for (auto node : m_nodes)
25
+ creator->append(node);
26
+ }
27
+ };
28
+ }