miniruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,43 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # The result of parsing a MiniRuby string/file.
6
+ # Combines an AST (Abstract Syntax Tree) and a list of errors.
7
+ class Parser::Result
8
+ extend T::Sig
9
+
10
+ sig { returns(AST::ProgramNode) }
11
+ attr_reader :ast
12
+
13
+ sig { returns(T::Array[String]) }
14
+ attr_reader :errors
15
+
16
+ sig { params(ast: AST::ProgramNode, errors: T::Array[String]).void }
17
+ def initialize(ast, errors)
18
+ @ast = ast
19
+ @errors = errors
20
+ end
21
+
22
+ sig { returns(T::Boolean) }
23
+ def err?
24
+ @errors.any?
25
+ end
26
+
27
+ sig { returns(String) }
28
+ def inspect
29
+ buff = String.new
30
+ buff << "<#{self.class}>\n"
31
+ if @errors.any?
32
+ buff << " !Errors!\n"
33
+ @errors.each do |err|
34
+ buff << " - #{err}\n"
35
+ end
36
+ buff << "\n"
37
+ end
38
+
39
+ buff << " AST:\n"
40
+ buff << @ast.inspect(2)
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,198 @@
1
+ # typed: strict
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # MiniRuby parser
6
+ class Parser
7
+ extend T::Sig
8
+
9
+ require_relative 'parser/result'
10
+
11
+ class << self
12
+ extend T::Sig
13
+
14
+ sig { params(source: String).returns(Result) }
15
+ def parse(source)
16
+ new(source).parse
17
+ end
18
+
19
+ private :new
20
+ end
21
+
22
+ sig { params(source: String).void }
23
+ def initialize(source)
24
+ # Lexer/Tokenizer that produces tokens
25
+ @lexer = T.let(Lexer.new(source), Lexer)
26
+ # Next token used for predicting productions
27
+ @lookahead = T.let(Token.new(Token::NONE, Span::ZERO), Token)
28
+ @errors = T.let([], T::Array[String])
29
+ end
30
+
31
+ sig { returns(Result) }
32
+ def parse
33
+ advance # populate @lookahead
34
+ ast = parse_program
35
+ Result.new(ast, @errors)
36
+ end
37
+
38
+ private
39
+
40
+ # program = statements
41
+ sig { returns(AST::ProgramNode) }
42
+ def parse_program
43
+ statements = parse_statements
44
+
45
+ span = Span::ZERO
46
+ if statements.length > 0
47
+ span = statements.fetch(0).span.join(statements.fetch(-1).span)
48
+ end
49
+
50
+ AST::ProgramNode.new(statements:, span:)
51
+ end
52
+
53
+ # statements = statement*
54
+ sig { params(stop_tokens: Symbol).returns(T::Array[AST::StatementNode]) }
55
+ def parse_statements(*stop_tokens)
56
+ statements = T.let([], T::Array[AST::StatementNode])
57
+ swallow_statement_separators
58
+
59
+ while true
60
+ return statements if accept!([Token::END_OF_FILE, *stop_tokens])
61
+
62
+ statements << parse_statement
63
+ end
64
+ end
65
+
66
+ # statement = expression_statement
67
+ sig { returns(AST::StatementNode) }
68
+ def parse_statement
69
+ parse_expression_statement
70
+ end
71
+
72
+ # expression_statement = expression ("\n" | ";")
73
+ sig { returns(AST::StatementNode) }
74
+ def parse_expression_statement
75
+ expression = parse_expression
76
+ span = expression.span
77
+ if (separator = match(Token::NEWLINE, Token::SEMICOLON, Token::END_OF_FILE))
78
+ span = span.join(separator.span)
79
+ else
80
+ error_expected('a statement separator')
81
+ end
82
+
83
+ swallow_statement_separators
84
+ AST::ExpressionStatementNode.new(expression:, span:)
85
+ end
86
+
87
+ sig { returns(AST::ExpressionNode) }
88
+ def parse_expression
89
+ case @lookahead.type
90
+ when Token::FALSE
91
+ tok = advance
92
+ AST::FalseLiteralNode.new(span: tok.span)
93
+ when Token::TRUE
94
+ tok = advance
95
+ AST::TrueLiteralNode.new(span: tok.span)
96
+ when Token::NIL
97
+ tok = advance
98
+ AST::NilLiteralNode.new(span: tok.span)
99
+ when Token::INTEGER
100
+ tok = advance
101
+ AST::IntegerLiteralNode.new(span: tok.span, value: T.must(tok.value))
102
+ when Token::FLOAT
103
+ tok = advance
104
+ AST::FloatLiteralNode.new(span: tok.span, value: T.must(tok.value))
105
+ when Token::STRING
106
+ tok = advance
107
+ AST::StringLiteralNode.new(span: tok.span, value: T.must(tok.value))
108
+ when Token::IDENTIFIER
109
+ tok = advance
110
+ AST::IdentifierNode.new(span: tok.span, value: T.must(tok.value))
111
+ else
112
+ token = advance
113
+ add_error("unexpected token `#{tok}`") if token.type != Token::ERROR
114
+ AST::InvalidNode.new(span: token.span, token:)
115
+ end
116
+ end
117
+
118
+ # Move over to the next token.
119
+ sig { returns(Token) }
120
+ def advance
121
+ previous = @lookahead
122
+ @lookahead = @lexer.next
123
+ handle_error_token(@lookahead) if @lookahead.type == Token::ERROR
124
+
125
+ previous
126
+ end
127
+
128
+ # Add the content of an error token to the syntax error list.
129
+ sig { params(err: Token).void }
130
+ def handle_error_token(err)
131
+ msg = err.value
132
+ return unless msg
133
+
134
+ add_error(msg)
135
+ end
136
+
137
+ # Register a syntax error
138
+ sig { params(err: String).void }
139
+ def add_error(err)
140
+ @errors << err
141
+ end
142
+
143
+ # Checks whether the next token matches any the specified types.
144
+ sig { params(token_types: Symbol).returns(T::Boolean) }
145
+ def accept(*token_types)
146
+ accept!(token_types)
147
+ end
148
+
149
+ # Checks whether the next token matches any the specified types.
150
+ sig { params(token_types: T::Array[Symbol]).returns(T::Boolean) }
151
+ def accept!(token_types)
152
+ token_types.each do |type|
153
+ return true if @lookahead.type == type
154
+ end
155
+
156
+ false
157
+ end
158
+
159
+ # Checks if the next token matches any of the given types,
160
+ # if so it gets consumed.
161
+ sig { params(token_types: Symbol).returns(T.nilable(Token)) }
162
+ def match(*token_types)
163
+ token_types.each do |type|
164
+ return advance if accept(type)
165
+ end
166
+
167
+ nil
168
+ end
169
+
170
+ # Accept and ignore any number of consecutive newline tokens.
171
+ sig { void }
172
+ def swallow_newlines
173
+ while true
174
+ break unless match(Token::NEWLINE)
175
+ end
176
+ end
177
+
178
+ # Accept and ignore any number of consecutive newline or semicolon tokens.
179
+ sig { void }
180
+ def swallow_statement_separators
181
+ while true
182
+ break unless match(Token::NEWLINE, Token::SEMICOLON)
183
+ end
184
+ end
185
+
186
+ # Adds an error which tells the user that another type of token
187
+ # was expected.
188
+ sig { params(expected: String).void }
189
+ def error_expected(expected)
190
+ return if @lookahead.type == Token::ERROR
191
+
192
+ add_error("unexpected #{@lookahead.type_name}, expected #{expected}")
193
+ end
194
+
195
+
196
+ end
197
+
198
+ end
@@ -0,0 +1,31 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # A position of a single character in a piece of text
6
+ class Position
7
+ extend T::Sig
8
+
9
+ sig { returns(Integer) }
10
+ attr_reader :char_index
11
+
12
+ sig { params(char_index: Integer).void }
13
+ def initialize(char_index)
14
+ @char_index = char_index
15
+ end
16
+
17
+ ZERO = Position.new(0)
18
+
19
+ sig { params(other: Object).returns(T::Boolean) }
20
+ def ==(other)
21
+ return false unless other.is_a?(Position)
22
+
23
+ @char_index == other.char_index
24
+ end
25
+
26
+ sig { returns(String) }
27
+ def inspect
28
+ "P(#{char_index.inspect})"
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,41 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ module MiniRuby
5
+ # A collection of two positions: start and end
6
+ class Span
7
+ extend T::Sig
8
+
9
+ sig { returns(Position) }
10
+ attr_reader :start
11
+
12
+ sig { returns(Position) }
13
+ attr_reader :end
14
+
15
+ sig { params(start: Position, end_pos: Position).void }
16
+ def initialize(start, end_pos)
17
+ @start = start
18
+ @end = end_pos
19
+ end
20
+
21
+ ZERO = Span.new(Position::ZERO, Position::ZERO)
22
+
23
+ # Create a new span that includes the area of two spans.
24
+ sig { params(other: Span).returns(Span) }
25
+ def join(other)
26
+ Span.new(@start, other.end)
27
+ end
28
+
29
+ sig { params(other: Object).returns(T::Boolean) }
30
+ def ==(other)
31
+ return false unless other.is_a?(Span)
32
+
33
+ @start == other.start && @end == other.end
34
+ end
35
+
36
+ sig { returns(String) }
37
+ def inspect
38
+ "S(#{@start.inspect}, #{@end.inspect})"
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,300 @@
1
+ # typed: strong
2
+ # frozen_string_literal: true
3
+
4
+ require 'set'
5
+
6
+ module MiniRuby
7
+ # Represents a single token (word) produced by the lexer.
8
+ class Token
9
+ extend T::Sig
10
+
11
+ class << self
12
+ extend T::Sig
13
+
14
+ # Converts a token type into a human-readable string.
15
+ sig { params(type: Symbol).returns(String) }
16
+ def type_to_string(type)
17
+ case type
18
+ when NONE
19
+ 'NONE'
20
+ when END_OF_FILE
21
+ 'END_OF_FILE'
22
+ when ERROR
23
+ 'ERROR'
24
+ when LPAREN
25
+ '('
26
+ when RPAREN
27
+ ')'
28
+ when COMMA
29
+ ','
30
+ when SEMICOLON
31
+ ';'
32
+ when NEWLINE
33
+ 'NEWLINE'
34
+ when EQUAL
35
+ '='
36
+ when BANG
37
+ '!'
38
+ when EQUAL_EQUAL
39
+ '=='
40
+ when NOT_EQUAL
41
+ '!='
42
+ when GREATER
43
+ '>'
44
+ when GREATER_EQUAL
45
+ '>='
46
+ when LESS
47
+ '<'
48
+ when LESS_EQUAL
49
+ '<='
50
+ when PLUS
51
+ '+'
52
+ when MINUS
53
+ '-'
54
+ when STAR
55
+ '*'
56
+ when SLASH
57
+ '/'
58
+ when FLOAT
59
+ 'FLOAT'
60
+ when INTEGER
61
+ 'INTEGER'
62
+ when STRING
63
+ 'STRING'
64
+ when IDENTIFIER
65
+ 'IDENTIFIER'
66
+ else
67
+ t = type.to_s
68
+ return t if KEYWORDS.include?(t)
69
+
70
+ '<invalid>'
71
+ end
72
+ end
73
+ end
74
+
75
+ sig { returns(Symbol) }
76
+ attr_reader :type
77
+
78
+ sig { returns(T.nilable(String)) }
79
+ attr_reader :value
80
+
81
+ sig { returns(Span) }
82
+ attr_reader :span
83
+
84
+ sig { params(type: Symbol, span: Span, value: T.nilable(String)).void }
85
+ def initialize(type, span, value = nil)
86
+ @type = type
87
+ @span = span
88
+ @value = value
89
+ end
90
+
91
+ sig { params(other: Object).returns(T::Boolean) }
92
+ def ==(other)
93
+ return false unless other.is_a?(Token)
94
+
95
+ type == other.type && value == other.value
96
+ end
97
+
98
+ sig { returns(String) }
99
+ def inspect
100
+ return "Token(#{type.inspect}, #{span.inspect})" if value.nil?
101
+
102
+ "Token(#{type.inspect}, #{span.inspect}, #{value.inspect})"
103
+ end
104
+
105
+ sig { returns(T::Boolean) }
106
+ def equality_operator?
107
+ case @type
108
+ when EQUAL_EQUAL, NOT_EQUAL
109
+ true
110
+ else
111
+ false
112
+ end
113
+ end
114
+
115
+ sig { returns(T::Boolean) }
116
+ def additive_operator?
117
+ case @type
118
+ when PLUS, MINUS
119
+ true
120
+ else
121
+ false
122
+ end
123
+ end
124
+
125
+ sig { returns(T::Boolean) }
126
+ def multiplicative_operator?
127
+ case @type
128
+ when STAR, SLASH
129
+ true
130
+ else
131
+ false
132
+ end
133
+ end
134
+
135
+ sig { returns(T::Boolean) }
136
+ def comparison_operator?
137
+ case @type
138
+ when GREATER, GREATER_EQUAL, LESS, LESS_EQUAL
139
+ true
140
+ else
141
+ false
142
+ end
143
+ end
144
+
145
+ sig { returns(String) }
146
+ def type_name
147
+ self.class.type_to_string(@type)
148
+ end
149
+
150
+ # Converts a token into a human-readable string.
151
+ sig { returns(String) }
152
+ def to_s
153
+ case type
154
+ when NONE
155
+ 'NONE'
156
+ when END_OF_FILE
157
+ 'END_OF_FILE'
158
+ when ERROR
159
+ "<error: #{value}>"
160
+ when LPAREN
161
+ '('
162
+ when RPAREN
163
+ ')'
164
+ when COMMA
165
+ ','
166
+ when SEMICOLON
167
+ ';'
168
+ when NEWLINE
169
+ 'NEWLINE'
170
+ when EQUAL
171
+ '='
172
+ when BANG
173
+ '!'
174
+ when EQUAL_EQUAL
175
+ '=='
176
+ when NOT_EQUAL
177
+ '!='
178
+ when GREATER
179
+ '>'
180
+ when GREATER_EQUAL
181
+ '>='
182
+ when LESS
183
+ '<'
184
+ when LESS_EQUAL
185
+ '<='
186
+ when PLUS
187
+ '+'
188
+ when MINUS
189
+ '-'
190
+ when STAR
191
+ '*'
192
+ when SLASH
193
+ '/'
194
+ when FLOAT, INTEGER, IDENTIFIER
195
+ value.to_s
196
+ when STRING
197
+ T.cast(value.inspect, String)
198
+ else
199
+ t = type.to_s
200
+ return t if KEYWORDS.include?(t)
201
+
202
+ '<invalid>'
203
+ end
204
+ end
205
+
206
+ # String containing all valid decimal digits
207
+ DIGITS = '0123456789'
208
+ # String containing all valid hexadecimal digits
209
+ HEX_DIGITS = '0123456789abcdefABCDEF'
210
+
211
+ # Set of all keywords
212
+ KEYWORDS = T.let(
213
+ Set[
214
+ 'false',
215
+ 'true',
216
+ 'nil',
217
+ 'if',
218
+ 'while',
219
+ 'return',
220
+ 'end',
221
+ 'else',
222
+ 'self',
223
+ ],
224
+ T::Set[String],
225
+ )
226
+
227
+ # List of all token types
228
+ # ------------------------
229
+
230
+ # Represents no token, a placeholder
231
+ NONE = :none
232
+ # Signifies that the entire string/file has been processed,
233
+ # there will be no more tokens
234
+ END_OF_FILE = :end_of_file
235
+ # Holds an error message, means that the string/file could not be
236
+ # successfully processed
237
+ ERROR = :error
238
+ # Left parentheses `(`
239
+ LPAREN = :lparen
240
+ # Right parentheses `)`
241
+ RPAREN = :rparen
242
+ # Comma `,`
243
+ COMMA = :comma
244
+ # Semicolon `;`
245
+ SEMICOLON = :semicolon
246
+ # Newline
247
+ NEWLINE = :newline
248
+ # Equal `=`
249
+ EQUAL = :equal
250
+ # Bang `!`
251
+ BANG = :bang
252
+ # Equal `==`
253
+ EQUAL_EQUAL = :equal_equal
254
+ # Equal `!=`
255
+ NOT_EQUAL = :not_equal
256
+ # Greater than `>`
257
+ GREATER = :greater
258
+ # Greater equal `>=`
259
+ GREATER_EQUAL = :greater_equal
260
+ # Less than `<`
261
+ LESS = :less
262
+ # Less equal `<=`
263
+ LESS_EQUAL = :less_equal
264
+ # Plus `+`
265
+ PLUS = :plus
266
+ # Minus `-`
267
+ MINUS = :minus
268
+ # Star `*`
269
+ STAR = :star
270
+ # Slash `/`
271
+ SLASH = :slash
272
+ # Integer literal eg. `123`
273
+ INTEGER = :integer
274
+ # Float literal eg. `12.3`
275
+ FLOAT = :float
276
+ # String literal eg. `"foo"`
277
+ STRING = :string
278
+ # Identifier eg. `foo`
279
+ IDENTIFIER = :identifier
280
+
281
+ # Keyword `false`
282
+ FALSE = :false
283
+ # Keyword `true`
284
+ TRUE = :true
285
+ # Keyword `nil`
286
+ NIL = :nil
287
+ # Keyword `if`
288
+ IF = :if
289
+ # Keyword `while`
290
+ WHILE = :while
291
+ # Keyword `return`
292
+ RETURN = :return
293
+ # Keyword `end`
294
+ END_K = :end
295
+ # Keyword `else`
296
+ ELSE = :else
297
+ # Keyword `self`
298
+ SELF = :self
299
+ end
300
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module MiniRuby
4
+ VERSION = '0.1.0'
5
+ end
data/lib/miniruby.rb ADDED
@@ -0,0 +1,72 @@
1
+ # typed: true
2
+ # frozen_string_literal: true
3
+
4
+ require 'sorbet-runtime'
5
+
6
+ # Contains the MiniRuby interpreter.
7
+ module MiniRuby
8
+ class Error < StandardError; end
9
+
10
+ class << self
11
+ extend T::Sig
12
+
13
+ # Tokenize the MiniRuby source string.
14
+ # Carries out lexical analysis and returns
15
+ # an array of tokens (words).
16
+ sig do
17
+ params(
18
+ source: String,
19
+ ).returns(T::Array[Token])
20
+ end
21
+ def lex(source)
22
+ Lexer.lex(source)
23
+ end
24
+
25
+ # Parse the MiniRuby source.
26
+ # Returns an AST (Abstract Syntax Tree) and a list of errors.
27
+ sig do
28
+ params(
29
+ source: String,
30
+ ).returns(Parser::Result)
31
+ end
32
+ def parse(source)
33
+ Parser.parse(source)
34
+ end
35
+
36
+ # # Compile the MiniRuby source.
37
+ # # Returns a chunk of compiled bytecode.
38
+ # sig do
39
+ # params(
40
+ # source: String,
41
+ # ).returns(BytecodeFunction)
42
+ # end
43
+ # def compile(source)
44
+ # Compiler.compile_source(source)
45
+ # end
46
+
47
+ # # Interpret the MiniRuby source with the Virtual Machine.
48
+ # # Returns the last computed value.
49
+ # sig do
50
+ # params(
51
+ # source: String,
52
+ # stdout: IO,
53
+ # stdin: IO,
54
+ # ).returns(Object)
55
+ # end
56
+ # def interpret(source, stdout: $stdout, stdin: $stdin)
57
+ # VM.interpret(source, stdout:, stdin:)
58
+ # end
59
+ end
60
+ end
61
+
62
+ require_relative 'miniruby/version'
63
+ require_relative 'miniruby/position'
64
+ require_relative 'miniruby/span'
65
+ require_relative 'miniruby/token'
66
+ require_relative 'miniruby/lexer'
67
+ require_relative 'miniruby/ast'
68
+ require_relative 'miniruby/parser'
69
+ # require_relative 'miniruby/opcode'
70
+ # require_relative 'miniruby/bytecode_function'
71
+ # require_relative 'miniruby/compiler'
72
+ # require_relative 'miniruby/vm'