natalie_parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +22 -0
  3. data/Dockerfile +26 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +55 -0
  7. data/Rakefile +242 -0
  8. data/ext/natalie_parser/extconf.rb +9 -0
  9. data/ext/natalie_parser/mri_creator.hpp +139 -0
  10. data/ext/natalie_parser/natalie_parser.cpp +144 -0
  11. data/include/natalie_parser/creator/debug_creator.hpp +113 -0
  12. data/include/natalie_parser/creator.hpp +108 -0
  13. data/include/natalie_parser/lexer/interpolated_string_lexer.hpp +64 -0
  14. data/include/natalie_parser/lexer/regexp_lexer.hpp +37 -0
  15. data/include/natalie_parser/lexer/word_array_lexer.hpp +57 -0
  16. data/include/natalie_parser/lexer.hpp +135 -0
  17. data/include/natalie_parser/node/alias_node.hpp +35 -0
  18. data/include/natalie_parser/node/arg_node.hpp +74 -0
  19. data/include/natalie_parser/node/array_node.hpp +34 -0
  20. data/include/natalie_parser/node/array_pattern_node.hpp +28 -0
  21. data/include/natalie_parser/node/assignment_node.hpp +34 -0
  22. data/include/natalie_parser/node/back_ref_node.hpp +28 -0
  23. data/include/natalie_parser/node/begin_block_node.hpp +25 -0
  24. data/include/natalie_parser/node/begin_node.hpp +52 -0
  25. data/include/natalie_parser/node/begin_rescue_node.hpp +47 -0
  26. data/include/natalie_parser/node/bignum_node.hpp +37 -0
  27. data/include/natalie_parser/node/block_node.hpp +55 -0
  28. data/include/natalie_parser/node/block_pass_node.hpp +33 -0
  29. data/include/natalie_parser/node/break_node.hpp +32 -0
  30. data/include/natalie_parser/node/call_node.hpp +85 -0
  31. data/include/natalie_parser/node/case_in_node.hpp +40 -0
  32. data/include/natalie_parser/node/case_node.hpp +52 -0
  33. data/include/natalie_parser/node/case_when_node.hpp +43 -0
  34. data/include/natalie_parser/node/class_node.hpp +39 -0
  35. data/include/natalie_parser/node/colon2_node.hpp +44 -0
  36. data/include/natalie_parser/node/colon3_node.hpp +34 -0
  37. data/include/natalie_parser/node/constant_node.hpp +26 -0
  38. data/include/natalie_parser/node/def_node.hpp +55 -0
  39. data/include/natalie_parser/node/defined_node.hpp +33 -0
  40. data/include/natalie_parser/node/encoding_node.hpp +26 -0
  41. data/include/natalie_parser/node/end_block_node.hpp +25 -0
  42. data/include/natalie_parser/node/evaluate_to_string_node.hpp +37 -0
  43. data/include/natalie_parser/node/false_node.hpp +23 -0
  44. data/include/natalie_parser/node/fixnum_node.hpp +36 -0
  45. data/include/natalie_parser/node/float_node.hpp +36 -0
  46. data/include/natalie_parser/node/hash_node.hpp +34 -0
  47. data/include/natalie_parser/node/hash_pattern_node.hpp +27 -0
  48. data/include/natalie_parser/node/identifier_node.hpp +123 -0
  49. data/include/natalie_parser/node/if_node.hpp +43 -0
  50. data/include/natalie_parser/node/infix_op_node.hpp +46 -0
  51. data/include/natalie_parser/node/interpolated_node.hpp +33 -0
  52. data/include/natalie_parser/node/interpolated_regexp_node.hpp +28 -0
  53. data/include/natalie_parser/node/interpolated_shell_node.hpp +22 -0
  54. data/include/natalie_parser/node/interpolated_string_node.hpp +31 -0
  55. data/include/natalie_parser/node/interpolated_symbol_key_node.hpp +18 -0
  56. data/include/natalie_parser/node/interpolated_symbol_node.hpp +28 -0
  57. data/include/natalie_parser/node/iter_node.hpp +45 -0
  58. data/include/natalie_parser/node/keyword_arg_node.hpp +25 -0
  59. data/include/natalie_parser/node/keyword_splat_node.hpp +38 -0
  60. data/include/natalie_parser/node/logical_and_node.hpp +40 -0
  61. data/include/natalie_parser/node/logical_or_node.hpp +40 -0
  62. data/include/natalie_parser/node/match_node.hpp +38 -0
  63. data/include/natalie_parser/node/module_node.hpp +32 -0
  64. data/include/natalie_parser/node/multiple_assignment_arg_node.hpp +32 -0
  65. data/include/natalie_parser/node/multiple_assignment_node.hpp +37 -0
  66. data/include/natalie_parser/node/next_node.hpp +37 -0
  67. data/include/natalie_parser/node/nil_node.hpp +23 -0
  68. data/include/natalie_parser/node/nil_sexp_node.hpp +23 -0
  69. data/include/natalie_parser/node/node.hpp +155 -0
  70. data/include/natalie_parser/node/node_with_args.hpp +47 -0
  71. data/include/natalie_parser/node/not_match_node.hpp +35 -0
  72. data/include/natalie_parser/node/not_node.hpp +37 -0
  73. data/include/natalie_parser/node/nth_ref_node.hpp +27 -0
  74. data/include/natalie_parser/node/op_assign_accessor_node.hpp +74 -0
  75. data/include/natalie_parser/node/op_assign_and_node.hpp +34 -0
  76. data/include/natalie_parser/node/op_assign_node.hpp +47 -0
  77. data/include/natalie_parser/node/op_assign_or_node.hpp +34 -0
  78. data/include/natalie_parser/node/pin_node.hpp +33 -0
  79. data/include/natalie_parser/node/range_node.hpp +52 -0
  80. data/include/natalie_parser/node/redo_node.hpp +20 -0
  81. data/include/natalie_parser/node/regexp_node.hpp +36 -0
  82. data/include/natalie_parser/node/retry_node.hpp +20 -0
  83. data/include/natalie_parser/node/return_node.hpp +34 -0
  84. data/include/natalie_parser/node/safe_call_node.hpp +31 -0
  85. data/include/natalie_parser/node/sclass_node.hpp +37 -0
  86. data/include/natalie_parser/node/self_node.hpp +23 -0
  87. data/include/natalie_parser/node/shadow_arg_node.hpp +40 -0
  88. data/include/natalie_parser/node/shell_node.hpp +32 -0
  89. data/include/natalie_parser/node/splat_node.hpp +39 -0
  90. data/include/natalie_parser/node/splat_value_node.hpp +32 -0
  91. data/include/natalie_parser/node/stabby_proc_node.hpp +29 -0
  92. data/include/natalie_parser/node/string_node.hpp +42 -0
  93. data/include/natalie_parser/node/super_node.hpp +44 -0
  94. data/include/natalie_parser/node/symbol_key_node.hpp +19 -0
  95. data/include/natalie_parser/node/symbol_node.hpp +30 -0
  96. data/include/natalie_parser/node/to_array_node.hpp +33 -0
  97. data/include/natalie_parser/node/true_node.hpp +23 -0
  98. data/include/natalie_parser/node/unary_op_node.hpp +41 -0
  99. data/include/natalie_parser/node/undef_node.hpp +31 -0
  100. data/include/natalie_parser/node/until_node.hpp +21 -0
  101. data/include/natalie_parser/node/while_node.hpp +52 -0
  102. data/include/natalie_parser/node/yield_node.hpp +29 -0
  103. data/include/natalie_parser/node.hpp +89 -0
  104. data/include/natalie_parser/parser.hpp +218 -0
  105. data/include/natalie_parser/token.hpp +842 -0
  106. data/include/tm/defer.hpp +34 -0
  107. data/include/tm/hashmap.hpp +826 -0
  108. data/include/tm/macros.hpp +16 -0
  109. data/include/tm/optional.hpp +223 -0
  110. data/include/tm/owned_ptr.hpp +186 -0
  111. data/include/tm/recursion_guard.hpp +156 -0
  112. data/include/tm/shared_ptr.hpp +259 -0
  113. data/include/tm/string.hpp +1447 -0
  114. data/include/tm/tests.hpp +78 -0
  115. data/include/tm/vector.hpp +796 -0
  116. data/lib/natalie_parser/sexp.rb +36 -0
  117. data/lib/natalie_parser/version.rb +5 -0
  118. data/lib/natalie_parser.rb +3 -0
  119. data/natalie_parser.gemspec +23 -0
  120. data/src/lexer/interpolated_string_lexer.cpp +88 -0
  121. data/src/lexer/regexp_lexer.cpp +95 -0
  122. data/src/lexer/word_array_lexer.cpp +134 -0
  123. data/src/lexer.cpp +1703 -0
  124. data/src/node/alias_node.cpp +11 -0
  125. data/src/node/assignment_node.cpp +33 -0
  126. data/src/node/begin_node.cpp +29 -0
  127. data/src/node/begin_rescue_node.cpp +33 -0
  128. data/src/node/class_node.cpp +22 -0
  129. data/src/node/interpolated_regexp_node.cpp +19 -0
  130. data/src/node/interpolated_shell_node.cpp +25 -0
  131. data/src/node/interpolated_string_node.cpp +111 -0
  132. data/src/node/interpolated_symbol_node.cpp +25 -0
  133. data/src/node/match_node.cpp +14 -0
  134. data/src/node/module_node.cpp +21 -0
  135. data/src/node/multiple_assignment_node.cpp +37 -0
  136. data/src/node/node.cpp +10 -0
  137. data/src/node/node_with_args.cpp +35 -0
  138. data/src/node/op_assign_node.cpp +36 -0
  139. data/src/node/string_node.cpp +33 -0
  140. data/src/parser.cpp +2972 -0
  141. data/src/token.cpp +27 -0
  142. metadata +186 -0
@@ -0,0 +1,36 @@
1
+ class Sexp < Array
2
+ def initialize(*items)
3
+ items.each { |i| self << i }
4
+ end
5
+
6
+ def self.from_array(ary)
7
+ Sexp.new(*ary)
8
+ end
9
+
10
+ attr_accessor :file, :line, :column, :comments
11
+
12
+ def inspect
13
+ "s(#{map(&:inspect).join(', ')})"
14
+ end
15
+
16
+ alias sexp_type first
17
+
18
+ def new(*items)
19
+ s = Sexp.new(*items)
20
+ s.file = file
21
+ s.line = line
22
+ s.column = column
23
+ s
24
+ end
25
+
26
+ def pretty_print q
27
+ nnd = ")"
28
+ q.group(1, "s(", nnd) do
29
+ q.seplist(self) { |v| q.pp v }
30
+ end
31
+ end
32
+ end
33
+
34
+ def s(*items)
35
+ Sexp.new(*items)
36
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class NatalieParser
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,3 @@
1
+ require_relative './natalie_parser/sexp'
2
+ require_relative './natalie_parser/version'
3
+ require 'natalie_parser/natalie_parser'
@@ -0,0 +1,23 @@
1
+ lib = File.expand_path('lib', __dir__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'natalie_parser/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'natalie_parser'
7
+ spec.version = NatalieParser::VERSION
8
+ spec.authors = ['Tim Morgan']
9
+ spec.email = ['tim@timmorgan.org']
10
+
11
+ spec.summary = 'A Parser for the Ruby Programming Language'
12
+ spec.description = 'NatalieParser is a zero-dependency, from-scratch, hand-written recursive descent parser for the Ruby Programming Language.'
13
+ spec.homepage = 'https://github.com/natalie-lang/natalie_parser'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
17
+ `git ls-files`.split("\n").reject { |f| f.match(%r{^(test|\.)}) }
18
+ end
19
+
20
+ spec.require_paths = ['lib', 'ext']
21
+ spec.extensions = %w[ext/natalie_parser/extconf.rb]
22
+ end
23
+
@@ -0,0 +1,88 @@
1
+ #include "natalie_parser/lexer/interpolated_string_lexer.hpp"
2
+ #include "natalie_parser/token.hpp"
3
+
4
+ namespace NatalieParser {
5
+
6
+ Token InterpolatedStringLexer::build_next_token() {
7
+ switch (m_state) {
8
+ case State::InProgress:
9
+ return consume_string();
10
+ case State::EvaluateBegin:
11
+ return start_evaluation();
12
+ case State::EvaluateEnd:
13
+ return stop_evaluation();
14
+ case State::EndToken:
15
+ return finish();
16
+ case State::Done:
17
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
18
+ }
19
+ TM_UNREACHABLE();
20
+ }
21
+
22
+ Token InterpolatedStringLexer::consume_string() {
23
+ SharedPtr<String> buf = new String;
24
+ while (auto c = current_char()) {
25
+ if (c == '\\') {
26
+ advance(); // backslash
27
+ auto result = consume_escaped_byte(*buf);
28
+ if (!result.first)
29
+ return Token { result.second, current_char(), m_file, m_cursor_line, m_cursor_column };
30
+ } else if (c == '#' && peek() == '{') {
31
+ if (buf->is_empty()) {
32
+ advance(2);
33
+ return start_evaluation();
34
+ }
35
+ auto token = Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
36
+ advance(2);
37
+ m_state = State::EvaluateBegin;
38
+ return token;
39
+ } else if (c == m_start_char && m_start_char != m_stop_char) {
40
+ m_pair_depth++;
41
+ advance();
42
+ buf->append_char(c);
43
+ } else if (c == m_stop_char) {
44
+ advance();
45
+ if (m_pair_depth > 0) {
46
+ m_pair_depth--;
47
+ buf->append_char(c);
48
+ } else if (buf->is_empty()) {
49
+ return finish();
50
+ } else {
51
+ m_state = State::EndToken;
52
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
53
+ }
54
+ } else {
55
+ buf->append_char(c);
56
+ advance();
57
+ }
58
+ }
59
+
60
+ // Heredocs don't use a stop char --
61
+ // they just give us the whole input and we consume everything.
62
+ if (m_stop_char == 0) {
63
+ advance();
64
+ m_state = State::EndToken;
65
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
66
+ }
67
+
68
+ return Token { Token::Type::UnterminatedString, buf, m_file, m_token_line, m_token_column };
69
+ }
70
+
71
+ Token InterpolatedStringLexer::start_evaluation() {
72
+ m_nested_lexer = new Lexer { *this, '{', '}' };
73
+ m_state = State::EvaluateEnd;
74
+ return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column };
75
+ }
76
+
77
+ Token InterpolatedStringLexer::stop_evaluation() {
78
+ advance(); // }
79
+ m_state = State::InProgress;
80
+ return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column };
81
+ }
82
+
83
+ Token InterpolatedStringLexer::finish() {
84
+ m_state = State::Done;
85
+ return Token { m_end_type, m_file, m_cursor_line, m_cursor_column };
86
+ }
87
+
88
+ };
@@ -0,0 +1,95 @@
1
+ #include "natalie_parser/lexer/regexp_lexer.hpp"
2
+ #include "natalie_parser/token.hpp"
3
+
4
+ namespace NatalieParser {
5
+
6
+ Token RegexpLexer::build_next_token() {
7
+ switch (m_state) {
8
+ case State::InProgress:
9
+ return consume_regexp();
10
+ case State::EvaluateBegin:
11
+ m_nested_lexer = new Lexer { *this };
12
+ m_nested_lexer->set_stop_char('}');
13
+ m_state = State::EvaluateEnd;
14
+ return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column };
15
+ case State::EvaluateEnd:
16
+ advance(); // }
17
+ if (current_char() == m_stop_char) {
18
+ advance();
19
+ m_options = consume_options();
20
+ m_state = State::EndToken;
21
+ } else {
22
+ m_state = State::InProgress;
23
+ }
24
+ return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column };
25
+ case State::EndToken: {
26
+ m_state = State::Done;
27
+ auto token = Token { Token::Type::InterpolatedRegexpEnd, m_file, m_cursor_line, m_cursor_column };
28
+ if (m_options && !m_options->is_empty())
29
+ token.set_literal(m_options);
30
+ return token;
31
+ }
32
+ case State::Done:
33
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
34
+ }
35
+ TM_UNREACHABLE();
36
+ }
37
+
38
+ Token RegexpLexer::consume_regexp() {
39
+ SharedPtr<String> buf = new String;
40
+ while (auto c = current_char()) {
41
+ if (c == '\\') {
42
+ c = next();
43
+ switch (c) {
44
+ case '/':
45
+ buf->append_char(c);
46
+ break;
47
+ default:
48
+ if (c == m_stop_char) {
49
+ buf->append_char(c);
50
+ } else {
51
+ buf->append_char('\\');
52
+ buf->append_char(c);
53
+ }
54
+ break;
55
+ }
56
+ advance();
57
+ } else if (c == '#' && peek() == '{') {
58
+ auto token = Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
59
+ buf = new String;
60
+ advance(2);
61
+ m_state = State::EvaluateBegin;
62
+ return token;
63
+ } else if (c == m_start_char && m_start_char != m_stop_char) {
64
+ m_pair_depth++;
65
+ advance();
66
+ buf->append_char(c);
67
+ } else if (c == m_stop_char) {
68
+ advance();
69
+ if (m_pair_depth > 0) {
70
+ m_pair_depth--;
71
+ buf->append_char(c);
72
+ } else {
73
+ m_options = consume_options();
74
+ m_state = State::EndToken;
75
+ return Token { Token::Type::String, buf, m_file, m_token_line, m_token_column };
76
+ }
77
+ } else {
78
+ buf->append_char(c);
79
+ advance();
80
+ }
81
+ }
82
+ return Token { Token::Type::UnterminatedRegexp, buf, m_file, m_token_line, m_token_column };
83
+ }
84
+
85
+ String *RegexpLexer::consume_options() {
86
+ char c = current_char();
87
+ auto options = new String;
88
+ while (c == 'i' || c == 'm' || c == 'x' || c == 'o' || c == 'u' || c == 'e' || c == 's' || c == 'n') {
89
+ options->append_char(c);
90
+ c = next();
91
+ }
92
+ return options;
93
+ }
94
+
95
+ };
@@ -0,0 +1,134 @@
1
+ #include "natalie_parser/lexer/word_array_lexer.hpp"
2
+ #include "natalie_parser/token.hpp"
3
+
4
+ namespace NatalieParser {
5
+
6
+ Token WordArrayLexer::build_next_token() {
7
+ switch (m_state) {
8
+ case State::InProgress:
9
+ return consume_array();
10
+ case State::DynamicStringInProgress:
11
+ return consume_array();
12
+ case State::DynamicStringBegin:
13
+ m_state = State::EvaluateBegin;
14
+ return Token { Token::Type::String, m_buffer, m_file, m_token_line, m_token_column };
15
+ case State::DynamicStringEnd:
16
+ if (current_char() == m_stop_char) {
17
+ advance();
18
+ m_state = State::EndToken;
19
+ } else {
20
+ m_state = State::InProgress;
21
+ }
22
+ return Token { Token::Type::InterpolatedStringEnd, m_file, m_token_line, m_token_column };
23
+ case State::EvaluateBegin:
24
+ return start_evaluation();
25
+ case State::EvaluateEnd:
26
+ advance(); // }
27
+ m_state = State::DynamicStringInProgress;
28
+ return Token { Token::Type::EvaluateToStringEnd, m_file, m_token_line, m_token_column };
29
+ case State::EndToken:
30
+ m_state = State::Done;
31
+ return Token { Token::Type::RBracket, m_file, m_cursor_line, m_cursor_column };
32
+ case State::Done:
33
+ return Token { Token::Type::Eof, m_file, m_cursor_line, m_cursor_column };
34
+ }
35
+ TM_UNREACHABLE();
36
+ }
37
+
38
+ Token WordArrayLexer::consume_array() {
39
+ m_buffer = new String;
40
+ while (auto c = current_char()) {
41
+ if (c == '\\') {
42
+ c = next();
43
+ advance();
44
+ if (c == ' ') {
45
+ m_buffer->append_char(c);
46
+ } else if (m_interpolated) {
47
+ // FIXME: need to use logic from InterpolatedStringLexer
48
+ switch (c) {
49
+ case 'n':
50
+ m_buffer->append_char('\n');
51
+ break;
52
+ case 't':
53
+ m_buffer->append_char('\t');
54
+ break;
55
+ default:
56
+ m_buffer->append_char(c);
57
+ break;
58
+ }
59
+ } else {
60
+ if (isspace(c)) {
61
+ m_buffer->append_char(c);
62
+ } else {
63
+ m_buffer->append_char('\\');
64
+ m_buffer->append_char(c);
65
+ }
66
+ }
67
+ } else if (isspace(c)) {
68
+ if (m_state == State::DynamicStringInProgress) {
69
+ advance();
70
+ return dynamic_string_finish();
71
+ }
72
+ if (!m_buffer->is_empty()) {
73
+ auto token = Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column };
74
+ advance();
75
+ return token;
76
+ }
77
+ advance(); // space
78
+ } else if (m_interpolated && c == '#' && peek() == '{') {
79
+ return in_progress_start_dynamic_string();
80
+ } else if (c == m_start_char && m_start_char != m_stop_char) {
81
+ m_pair_depth++;
82
+ advance();
83
+ m_buffer->append_char(c);
84
+ } else if (c == m_stop_char) {
85
+ if (m_pair_depth > 0) {
86
+ m_pair_depth--;
87
+ m_buffer->append_char(c);
88
+ advance();
89
+ } else if (m_state == State::DynamicStringInProgress) {
90
+ return dynamic_string_finish();
91
+ } else {
92
+ return in_progress_finish();
93
+ }
94
+ } else {
95
+ m_buffer->append_char(c);
96
+ advance();
97
+ }
98
+ }
99
+
100
+ return Token { Token::Type::UnterminatedWordArray, m_buffer, m_file, m_token_line, m_token_column };
101
+ }
102
+
103
+ Token WordArrayLexer::in_progress_start_dynamic_string() {
104
+ advance(2); // #{
105
+ m_state = State::DynamicStringBegin;
106
+ return Token { Token::Type::InterpolatedStringBegin, m_file, m_cursor_line, m_cursor_column };
107
+ }
108
+
109
+ Token WordArrayLexer::start_evaluation() {
110
+ m_nested_lexer = new Lexer { *this, '{', '}' };
111
+ m_state = State::EvaluateEnd;
112
+ return Token { Token::Type::EvaluateToStringBegin, m_file, m_token_line, m_token_column };
113
+ }
114
+
115
+ Token WordArrayLexer::dynamic_string_finish() {
116
+ if (!m_buffer->is_empty()) {
117
+ m_state = State::DynamicStringEnd;
118
+ return Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column };
119
+ }
120
+ m_state = State::InProgress;
121
+ return Token { Token::Type::InterpolatedStringEnd, m_file, m_token_line, m_token_column };
122
+ }
123
+
124
+ Token WordArrayLexer::in_progress_finish() {
125
+ advance(); // ) or ] or } or whatever
126
+ if (!m_buffer->is_empty()) {
127
+ m_state = State::EndToken;
128
+ return Token { Token::Type::String, m_buffer, m_file, m_cursor_line, m_cursor_column };
129
+ }
130
+ m_state = State::Done;
131
+ return Token { Token::Type::RBracket, m_file, m_cursor_line, m_cursor_column };
132
+ }
133
+
134
+ };