miniruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,43 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # The result of parsing a MiniRuby string/file.
6
+ # Combines an AST (Abstract Syntax Tree) and a list of errors.
7
+ class Parser::Result
8
+ extend T::Sig
9
+
10
+ sig { returns(AST::ProgramNode) }
11
+ attr_reader :ast
12
+
13
+ sig { returns(T::Array[String]) }
14
+ attr_reader :errors
15
+
16
+ sig { params(ast: AST::ProgramNode, errors: T::Array[String]).void }
17
+ def initialize(ast, errors)
18
+ @ast = ast
19
+ @errors = errors
20
+ end
21
+
22
+ sig { returns(T::Boolean) }
23
+ def err?
24
+ @errors.any?
25
+ end
26
+
27
+ sig { returns(String) }
28
+ def inspect
29
+ buff = String.new
30
+ buff << "<#{self.class}>\n"
31
+ if @errors.any?
32
+ buff << " !Errors!\n"
33
+ @errors.each do |err|
34
+ buff << " - #{err}\n"
35
+ end
36
+ buff << "\n"
37
+ end
38
+
39
+ buff << " AST:\n"
40
+ buff << @ast.inspect(2)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,198 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # MiniRuby parser
6
+ class Parser
7
+ extend T::Sig
8
+
9
+ require_relative 'parser/result'
10
+
11
+ class << self
12
+ extend T::Sig
13
+
14
+ sig { params(source: String).returns(Result) }
15
+ def parse(source)
16
+ new(source).parse
17
+ end
18
+
19
+ private :new
20
+ end
21
+
22
+ sig { params(source: String).void }
23
+ def initialize(source)
24
+ # Lexer/Tokenizer that produces tokens
25
+ @lexer = T.let(Lexer.new(source), Lexer)
26
+ # Next token used for predicting productions
27
+ @lookahead = T.let(Token.new(Token::NONE, Span::ZERO), Token)
28
+ @errors = T.let([], T::Array[String])
29
+ end
30
+
31
+ sig { returns(Result) }
32
+ def parse
33
+ advance # populate @lookahead
34
+ ast = parse_program
35
+ Result.new(ast, @errors)
36
+ end
37
+
38
+ private
39
+
40
+ # program = statements
41
+ sig { returns(AST::ProgramNode) }
42
+ def parse_program
43
+ statements = parse_statements
44
+
45
+ span = Span::ZERO
46
+ if statements.length > 0
47
+ span = statements.fetch(0).span.join(statements.fetch(-1).span)
48
+ end
49
+
50
+ AST::ProgramNode.new(statements:, span:)
51
+ end
52
+
53
+ # statements = statement*
54
+ sig { params(stop_tokens: Symbol).returns(T::Array[AST::StatementNode]) }
55
+ def parse_statements(*stop_tokens)
56
+ statements = T.let([], T::Array[AST::StatementNode])
57
+ swallow_statement_separators
58
+
59
+ while true
60
+ return statements if accept!([Token::END_OF_FILE, *stop_tokens])
61
+
62
+ statements << parse_statement
63
+ end
64
+ end
65
+
66
+ # statement = expression_statement
67
+ sig { returns(AST::StatementNode) }
68
+ def parse_statement
69
+ parse_expression_statement
70
+ end
71
+
72
+ # expression_statement = expression ("\n" | ";")
73
+ sig { returns(AST::StatementNode) }
74
+ def parse_expression_statement
75
+ expression = parse_expression
76
+ span = expression.span
77
+ if (separator = match(Token::NEWLINE, Token::SEMICOLON, Token::END_OF_FILE))
78
+ span = span.join(separator.span)
79
+ else
80
+ error_expected('a statement separator')
81
+ end
82
+
83
+ swallow_statement_separators
84
+ AST::ExpressionStatementNode.new(expression:, span:)
85
+ end
86
+
87
+ sig { returns(AST::ExpressionNode) }
88
+ def parse_expression
89
+ case @lookahead.type
90
+ when Token::FALSE
91
+ tok = advance
92
+ AST::FalseLiteralNode.new(span: tok.span)
93
+ when Token::TRUE
94
+ tok = advance
95
+ AST::TrueLiteralNode.new(span: tok.span)
96
+ when Token::NIL
97
+ tok = advance
98
+ AST::NilLiteralNode.new(span: tok.span)
99
+ when Token::INTEGER
100
+ tok = advance
101
+ AST::IntegerLiteralNode.new(span: tok.span, value: T.must(tok.value))
102
+ when Token::FLOAT
103
+ tok = advance
104
+ AST::FloatLiteralNode.new(span: tok.span, value: T.must(tok.value))
105
+ when Token::STRING
106
+ tok = advance
107
+ AST::StringLiteralNode.new(span: tok.span, value: T.must(tok.value))
108
+ when Token::IDENTIFIER
109
+ tok = advance
110
+ AST::IdentifierNode.new(span: tok.span, value: T.must(tok.value))
111
+ else
112
+ token = advance
113
+ add_error("unexpected token `#{tok}`") if token.type != Token::ERROR
114
+ AST::InvalidNode.new(span: token.span, token:)
115
+ end
116
+ end
117
+
118
+ # Move over to the next token.
119
+ sig { returns(Token) }
120
+ def advance
121
+ previous = @lookahead
122
+ @lookahead = @lexer.next
123
+ handle_error_token(@lookahead) if @lookahead.type == Token::ERROR
124
+
125
+ previous
126
+ end
127
+
128
+ # Add the content of an error token to the syntax error list.
129
+ sig { params(err: Token).void }
130
+ def handle_error_token(err)
131
+ msg = err.value
132
+ return unless msg
133
+
134
+ add_error(msg)
135
+ end
136
+
137
+ # Register a syntax error
138
+ sig { params(err: String).void }
139
+ def add_error(err)
140
+ @errors << err
141
+ end
142
+
143
+ # Checks whether the next token matches any the specified types.
144
+ sig { params(token_types: Symbol).returns(T::Boolean) }
145
+ def accept(*token_types)
146
+ accept!(token_types)
147
+ end
148
+
149
+ # Checks whether the next token matches any the specified types.
150
+ sig { params(token_types: T::Array[Symbol]).returns(T::Boolean) }
151
+ def accept!(token_types)
152
+ token_types.each do |type|
153
+ return true if @lookahead.type == type
154
+ end
155
+
156
+ false
157
+ end
158
+
159
+ # Checks if the next token matches any of the given types,
160
+ # if so it gets consumed.
161
+ sig { params(token_types: Symbol).returns(T.nilable(Token)) }
162
+ def match(*token_types)
163
+ token_types.each do |type|
164
+ return advance if accept(type)
165
+ end
166
+
167
+ nil
168
+ end
169
+
170
+ # Accept and ignore any number of consecutive newline tokens.
171
+ sig { void }
172
+ def swallow_newlines
173
+ while true
174
+ break unless match(Token::NEWLINE)
175
+ end
176
+ end
177
+
178
+ # Accept and ignore any number of consecutive newline or semicolon tokens.
179
+ sig { void }
180
+ def swallow_statement_separators
181
+ while true
182
+ break unless match(Token::NEWLINE, Token::SEMICOLON)
183
+ end
184
+ end
185
+
186
+ # Adds an error which tells the user that another type of token
187
+ # was expected.
188
+ sig { params(expected: String).void }
189
+ def error_expected(expected)
190
+ return if @lookahead.type == Token::ERROR
191
+
192
+ add_error("unexpected #{@lookahead.type_name}, expected #{expected}")
193
+ end
194
+
195
+
196
+ end
197
+
198
+ end
@@ -0,0 +1,31 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # A position of a single character in a piece of text
6
+ class Position
7
+ extend T::Sig
8
+
9
+ sig { returns(Integer) }
10
+ attr_reader :char_index
11
+
12
+ sig { params(char_index: Integer).void }
13
+ def initialize(char_index)
14
+ @char_index = char_index
15
+ end
16
+
17
+ ZERO = Position.new(0)
18
+
19
+ sig { params(other: Object).returns(T::Boolean) }
20
+ def ==(other)
21
+ return false unless other.is_a?(Position)
22
+
23
+ @char_index == other.char_index
24
+ end
25
+
26
+ sig { returns(String) }
27
+ def inspect
28
+ "P(#{char_index.inspect})"
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,41 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # A collection of two positions: start and end
6
+ class Span
7
+ extend T::Sig
8
+
9
+ sig { returns(Position) }
10
+ attr_reader :start
11
+
12
+ sig { returns(Position) }
13
+ attr_reader :end
14
+
15
+ sig { params(start: Position, end_pos: Position).void }
16
+ def initialize(start, end_pos)
17
+ @start = start
18
+ @end = end_pos
19
+ end
20
+
21
+ ZERO = Span.new(Position::ZERO, Position::ZERO)
22
+
23
+ # Create a new span that includes the area of two spans.
24
+ sig { params(other: Span).returns(Span) }
25
+ def join(other)
26
+ Span.new(@start, other.end)
27
+ end
28
+
29
+ sig { params(other: Object).returns(T::Boolean) }
30
+ def ==(other)
31
+ return false unless other.is_a?(Span)
32
+
33
+ @start == other.start && @end == other.end
34
+ end
35
+
36
+ sig { returns(String) }
37
+ def inspect
38
+ "S(#{@start.inspect}, #{@end.inspect})"
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,300 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ require 'set'
5
+
6
+ module MiniRuby
7
+ # Represents a single token (word) produced by the lexer.
8
+ class Token
9
+ extend T::Sig
10
+
11
+ class << self
12
+ extend T::Sig
13
+
14
+ # Converts a token type into a human-readable string.
15
+ sig { params(type: Symbol).returns(String) }
16
+ def type_to_string(type)
17
+ case type
18
+ when NONE
19
+ 'NONE'
20
+ when END_OF_FILE
21
+ 'END_OF_FILE'
22
+ when ERROR
23
+ 'ERROR'
24
+ when LPAREN
25
+ '('
26
+ when RPAREN
27
+ ')'
28
+ when COMMA
29
+ ','
30
+ when SEMICOLON
31
+ ';'
32
+ when NEWLINE
33
+ 'NEWLINE'
34
+ when EQUAL
35
+ '='
36
+ when BANG
37
+ '!'
38
+ when EQUAL_EQUAL
39
+ '=='
40
+ when NOT_EQUAL
41
+ '!='
42
+ when GREATER
43
+ '>'
44
+ when GREATER_EQUAL
45
+ '>='
46
+ when LESS
47
+ '<'
48
+ when LESS_EQUAL
49
+ '<='
50
+ when PLUS
51
+ '+'
52
+ when MINUS
53
+ '-'
54
+ when STAR
55
+ '*'
56
+ when SLASH
57
+ '/'
58
+ when FLOAT
59
+ 'FLOAT'
60
+ when INTEGER
61
+ 'INTEGER'
62
+ when STRING
63
+ 'STRING'
64
+ when IDENTIFIER
65
+ 'IDENTIFIER'
66
+ else
67
+ t = type.to_s
68
+ return t if KEYWORDS.include?(t)
69
+
70
+ '<invalid>'
71
+ end
72
+ end
73
+ end
74
+
75
+ sig { returns(Symbol) }
76
+ attr_reader :type
77
+
78
+ sig { returns(T.nilable(String)) }
79
+ attr_reader :value
80
+
81
+ sig { returns(Span) }
82
+ attr_reader :span
83
+
84
+ sig { params(type: Symbol, span: Span, value: T.nilable(String)).void }
85
+ def initialize(type, span, value = nil)
86
+ @type = type
87
+ @span = span
88
+ @value = value
89
+ end
90
+
91
+ sig { params(other: Object).returns(T::Boolean) }
92
+ def ==(other)
93
+ return false unless other.is_a?(Token)
94
+
95
+ type == other.type && value == other.value
96
+ end
97
+
98
+ sig { returns(String) }
99
+ def inspect
100
+ return "Token(#{type.inspect}, #{span.inspect})" if value.nil?
101
+
102
+ "Token(#{type.inspect}, #{span.inspect}, #{value.inspect})"
103
+ end
104
+
105
+ sig { returns(T::Boolean) }
106
+ def equality_operator?
107
+ case @type
108
+ when EQUAL_EQUAL, NOT_EQUAL
109
+ true
110
+ else
111
+ false
112
+ end
113
+ end
114
+
115
+ sig { returns(T::Boolean) }
116
+ def additive_operator?
117
+ case @type
118
+ when PLUS, MINUS
119
+ true
120
+ else
121
+ false
122
+ end
123
+ end
124
+
125
+ sig { returns(T::Boolean) }
126
+ def multiplicative_operator?
127
+ case @type
128
+ when STAR, SLASH
129
+ true
130
+ else
131
+ false
132
+ end
133
+ end
134
+
135
+ sig { returns(T::Boolean) }
136
+ def comparison_operator?
137
+ case @type
138
+ when GREATER, GREATER_EQUAL, LESS, LESS_EQUAL
139
+ true
140
+ else
141
+ false
142
+ end
143
+ end
144
+
145
+ sig { returns(String) }
146
+ def type_name
147
+ self.class.type_to_string(@type)
148
+ end
149
+
150
+ # Converts a token into a human-readable string.
151
+ sig { returns(String) }
152
+ def to_s
153
+ case type
154
+ when NONE
155
+ 'NONE'
156
+ when END_OF_FILE
157
+ 'END_OF_FILE'
158
+ when ERROR
159
+ "<error: #{value}>"
160
+ when LPAREN
161
+ '('
162
+ when RPAREN
163
+ ')'
164
+ when COMMA
165
+ ','
166
+ when SEMICOLON
167
+ ';'
168
+ when NEWLINE
169
+ 'NEWLINE'
170
+ when EQUAL
171
+ '='
172
+ when BANG
173
+ '!'
174
+ when EQUAL_EQUAL
175
+ '=='
176
+ when NOT_EQUAL
177
+ '!='
178
+ when GREATER
179
+ '>'
180
+ when GREATER_EQUAL
181
+ '>='
182
+ when LESS
183
+ '<'
184
+ when LESS_EQUAL
185
+ '<='
186
+ when PLUS
187
+ '+'
188
+ when MINUS
189
+ '-'
190
+ when STAR
191
+ '*'
192
+ when SLASH
193
+ '/'
194
+ when FLOAT, INTEGER, IDENTIFIER
195
+ value.to_s
196
+ when STRING
197
+ T.cast(value.inspect, String)
198
+ else
199
+ t = type.to_s
200
+ return t if KEYWORDS.include?(t)
201
+
202
+ '<invalid>'
203
+ end
204
+ end
205
+
206
+ # String containing all valid decimal digits
207
+ DIGITS = '0123456789'
208
+ # String containing all valid hexadecimal digits
209
+ HEX_DIGITS = '0123456789abcdefABCDEF'
210
+
211
+ # Set of all keywords
212
+ KEYWORDS = T.let(
213
+ Set[
214
+ 'false',
215
+ 'true',
216
+ 'nil',
217
+ 'if',
218
+ 'while',
219
+ 'return',
220
+ 'end',
221
+ 'else',
222
+ 'self',
223
+ ],
224
+ T::Set[String],
225
+ )
226
+
227
+ # List of all token types
228
+ # ------------------------
229
+
230
+ # Represents no token, a placeholder
231
+ NONE = :none
232
+ # Signifies that the entire string/file has been processed,
233
+ # there will be no more tokens
234
+ END_OF_FILE = :end_of_file
235
+ # Holds an error message, means that the string/file could not be
236
+ # successfully processed
237
+ ERROR = :error
238
+ # Left parentheses `(`
239
+ LPAREN = :lparen
240
+ # Right parentheses `)`
241
+ RPAREN = :rparen
242
+ # Comma `,`
243
+ COMMA = :comma
244
+ # Semicolon `;`
245
+ SEMICOLON = :semicolon
246
+ # Newline
247
+ NEWLINE = :newline
248
+ # Equal `=`
249
+ EQUAL = :equal
250
+ # Bang `!`
251
+ BANG = :bang
252
+ # Equal `==`
253
+ EQUAL_EQUAL = :equal_equal
254
+ # Equal `!=`
255
+ NOT_EQUAL = :not_equal
256
+ # Greater than `>`
257
+ GREATER = :greater
258
+ # Greater equal `>=`
259
+ GREATER_EQUAL = :greater_equal
260
+ # Less than `<`
261
+ LESS = :less
262
+ # Less equal `<=`
263
+ LESS_EQUAL = :less_equal
264
+ # Plus `+`
265
+ PLUS = :plus
266
+ # Minus `-`
267
+ MINUS = :minus
268
+ # Star `*`
269
+ STAR = :star
270
+ # Slash `/`
271
+ SLASH = :slash
272
+ # Integer literal eg. `123`
273
+ INTEGER = :integer
274
+ # Float literal eg. `12.3`
275
+ FLOAT = :float
276
+ # String literal eg. `"foo"`
277
+ STRING = :string
278
+ # Identifier eg. `foo`
279
+ IDENTIFIER = :identifier
280
+
281
+ # Keyword `false`
282
+ FALSE = :false
283
+ # Keyword `true`
284
+ TRUE = :true
285
+ # Keyword `nil`
286
+ NIL = :nil
287
+ # Keyword `if`
288
+ IF = :if
289
+ # Keyword `while`
290
+ WHILE = :while
291
+ # Keyword `return`
292
+ RETURN = :return
293
+ # Keyword `end`
294
+ END_K = :end
295
+ # Keyword `else`
296
+ ELSE = :else
297
+ # Keyword `self`
298
+ SELF = :self
299
+ end
300
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniRuby
4
+ VERSION = '0.1.0'
5
+ end
data/lib/miniruby.rb ADDED
@@ -0,0 +1,72 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ require 'sorbet-runtime'
5
+
6
+ # Contains the MiniRuby interpreter.
7
+ module MiniRuby
8
+ class Error < StandardError; end
9
+
10
+ class << self
11
+ extend T::Sig
12
+
13
+ # Tokenize the MiniRuby source string.
14
+ # Carries out lexical analysis and returns
15
+ # an array of tokens (words).
16
+ sig do
17
+ params(
18
+ source: String,
19
+ ).returns(T::Array[Token])
20
+ end
21
+ def lex(source)
22
+ Lexer.lex(source)
23
+ end
24
+
25
+ # Parse the MiniRuby source.
26
+ # Returns an AST (Abstract Syntax Tree) and a list of errors.
27
+ sig do
28
+ params(
29
+ source: String,
30
+ ).returns(Parser::Result)
31
+ end
32
+ def parse(source)
33
+ Parser.parse(source)
34
+ end
35
+
36
+ # # Compile the MiniRuby source.
37
+ # # Returns a chunk of compiled bytecode.
38
+ # sig do
39
+ # params(
40
+ # source: String,
41
+ # ).returns(BytecodeFunction)
42
+ # end
43
+ # def compile(source)
44
+ # Compiler.compile_source(source)
45
+ # end
46
+
47
+ # # Interpret the MiniRuby source with the Virtual Machine.
48
+ # # Returns the last computed value.
49
+ # sig do
50
+ # params(
51
+ # source: String,
52
+ # stdout: IO,
53
+ # stdin: IO,
54
+ # ).returns(Object)
55
+ # end
56
+ # def interpret(source, stdout: $stdout, stdin: $stdin)
57
+ # VM.interpret(source, stdout:, stdin:)
58
+ # end
59
+ end
60
+ end
61
+
62
+ require_relative 'miniruby/version'
63
+ require_relative 'miniruby/position'
64
+ require_relative 'miniruby/span'
65
+ require_relative 'miniruby/token'
66
+ require_relative 'miniruby/lexer'
67
+ require_relative 'miniruby/ast'
68
+ require_relative 'miniruby/parser'
69
+ # require_relative 'miniruby/opcode'
70
+ # require_relative 'miniruby/bytecode_function'
71
+ # require_relative 'miniruby/compiler'
72
+ # require_relative 'miniruby/vm'