miniruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +77 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE +21 -0
- data/README.md +332 -0
- data/Rakefile +12 -0
- data/lib/miniruby/ast.rb +325 -0
- data/lib/miniruby/lexer.rb +380 -0
- data/lib/miniruby/parser/result.rb +43 -0
- data/lib/miniruby/parser.rb +198 -0
- data/lib/miniruby/position.rb +31 -0
- data/lib/miniruby/span.rb +41 -0
- data/lib/miniruby/token.rb +300 -0
- data/lib/miniruby/version.rb +5 -0
- data/lib/miniruby.rb +72 -0
- metadata +75 -0
data/lib/miniruby/ast.rb
ADDED
@@ -0,0 +1,325 @@
|
|
1
|
+
# typed: strong
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
module MiniRuby
|
5
|
+
# Contains the definitions of all AST (Abstract Syntax Tree) nodes.
|
6
|
+
# AST is the data structure that is returned by the parser.
|
7
|
+
module AST
|
8
|
+
# A string that represents a single level of indentation
|
9
|
+
# in S-expressions
|
10
|
+
INDENT_UNIT = ' '
|
11
|
+
|
12
|
+
# Abstract class representing an AST node.
|
13
|
+
class Node
|
14
|
+
extend T::Sig
|
15
|
+
extend T::Helpers
|
16
|
+
|
17
|
+
abstract!
|
18
|
+
|
19
|
+
sig { returns(Span) }
|
20
|
+
attr_accessor :span
|
21
|
+
|
22
|
+
sig { params(span: Span).void }
|
23
|
+
def initialize(span: Span::ZERO)
|
24
|
+
@span = span
|
25
|
+
end
|
26
|
+
|
27
|
+
sig { params(other: Object).returns(T::Boolean) }
|
28
|
+
def ==(other)
|
29
|
+
other.is_a?(self.class)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get the Ruby-like representation of the AST
|
33
|
+
sig { abstract.params(indent: Integer).returns(String) }
|
34
|
+
def to_s(indent = 0); end
|
35
|
+
|
36
|
+
# Inspect the AST in the S-expression format
|
37
|
+
sig { abstract.params(indent: Integer).returns(String) }
|
38
|
+
def inspect(indent = 0); end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Represents a program
|
42
|
+
class ProgramNode < Node
|
43
|
+
sig { returns(T::Array[StatementNode]) }
|
44
|
+
attr_reader :statements
|
45
|
+
|
46
|
+
sig { params(statements: T::Array[StatementNode], span: Span).void }
|
47
|
+
def initialize(statements:, span: Span::ZERO)
|
48
|
+
@span = span
|
49
|
+
@statements = statements
|
50
|
+
end
|
51
|
+
|
52
|
+
sig { params(other: Object).returns(T::Boolean) }
|
53
|
+
def ==(other)
|
54
|
+
return false unless other.is_a?(ProgramNode)
|
55
|
+
|
56
|
+
@statements == other.statements
|
57
|
+
end
|
58
|
+
|
59
|
+
sig { override.params(indent: Integer).returns(String) }
|
60
|
+
def to_s(indent = 0)
|
61
|
+
buffer = String.new
|
62
|
+
|
63
|
+
@statements.each do |stmt|
|
64
|
+
buffer << stmt.to_s(indent)
|
65
|
+
end
|
66
|
+
|
67
|
+
buffer
|
68
|
+
end
|
69
|
+
|
70
|
+
sig { override.params(indent: Integer).returns(String) }
|
71
|
+
def inspect(indent = 0)
|
72
|
+
buff = String.new
|
73
|
+
|
74
|
+
buff << "#{INDENT_UNIT * indent}(program"
|
75
|
+
@statements.each do |stmt|
|
76
|
+
buff << "\n" << stmt.inspect(indent + 1)
|
77
|
+
end
|
78
|
+
buff << ')'
|
79
|
+
buff
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Represents a single statement (line) of code
|
84
|
+
class StatementNode < Node
|
85
|
+
abstract!
|
86
|
+
end
|
87
|
+
|
88
|
+
# Represents a statement with an expression like `2 + 3 - 5;`
|
89
|
+
class ExpressionStatementNode < StatementNode
|
90
|
+
sig { returns(ExpressionNode) }
|
91
|
+
attr_reader :expression
|
92
|
+
|
93
|
+
sig { params(expression: ExpressionNode, span: Span).void }
|
94
|
+
def initialize(expression:, span: Span::ZERO)
|
95
|
+
@span = span
|
96
|
+
@expression = expression
|
97
|
+
end
|
98
|
+
|
99
|
+
sig { params(other: Object).returns(T::Boolean) }
|
100
|
+
def ==(other)
|
101
|
+
return false unless other.is_a?(ExpressionStatementNode)
|
102
|
+
|
103
|
+
@expression == other.expression
|
104
|
+
end
|
105
|
+
|
106
|
+
sig { override.params(indent: Integer).returns(String) }
|
107
|
+
def to_s(indent = 0)
|
108
|
+
"#{INDENT_UNIT * indent}#{@expression}\n"
|
109
|
+
end
|
110
|
+
|
111
|
+
sig { override.params(indent: Integer).returns(String) }
|
112
|
+
def inspect(indent = 0)
|
113
|
+
buff = String.new
|
114
|
+
buff << "#{INDENT_UNIT * indent}(expr_stmt"
|
115
|
+
buff << "\n" << @expression.inspect(indent + 1)
|
116
|
+
buff << ')'
|
117
|
+
buff
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Represents an expression like `2 + 3`
|
122
|
+
# that can be a part of a larger expression/statement like `2 + 3 - 5`
|
123
|
+
class ExpressionNode < Node
|
124
|
+
abstract!
|
125
|
+
end
|
126
|
+
|
127
|
+
# Represents an invalid node
|
128
|
+
class InvalidNode < ExpressionNode
|
129
|
+
sig { returns(Token) }
|
130
|
+
attr_reader :token
|
131
|
+
|
132
|
+
sig { params(token: Token, span: Span).void }
|
133
|
+
def initialize(token:, span: Span::ZERO)
|
134
|
+
@span = span
|
135
|
+
@token = token
|
136
|
+
end
|
137
|
+
|
138
|
+
sig { params(other: Object).returns(T::Boolean) }
|
139
|
+
def ==(other)
|
140
|
+
return false unless other.is_a?(InvalidNode)
|
141
|
+
|
142
|
+
@token == other.token
|
143
|
+
end
|
144
|
+
|
145
|
+
sig { override.params(indent: Integer).returns(String) }
|
146
|
+
def to_s(indent = 0)
|
147
|
+
"#{INDENT_UNIT * indent}<invalid: `#{token}`>"
|
148
|
+
end
|
149
|
+
|
150
|
+
sig { override.params(indent: Integer).returns(String) }
|
151
|
+
def inspect(indent = 0)
|
152
|
+
"#{INDENT_UNIT * indent}(invalid #{token.inspect})"
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# Represents a false literal eg. `false`
|
157
|
+
class FalseLiteralNode < ExpressionNode
|
158
|
+
sig { override.params(indent: Integer).returns(String) }
|
159
|
+
def to_s(indent = 0)
|
160
|
+
"#{INDENT_UNIT * indent}false"
|
161
|
+
end
|
162
|
+
|
163
|
+
sig { override.params(indent: Integer).returns(String) }
|
164
|
+
def inspect(indent = 0)
|
165
|
+
"#{INDENT_UNIT * indent}false"
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Represents a true literal eg. `true`
|
170
|
+
class TrueLiteralNode < ExpressionNode
|
171
|
+
sig { override.params(indent: Integer).returns(String) }
|
172
|
+
def to_s(indent = 0)
|
173
|
+
"#{INDENT_UNIT * indent}true"
|
174
|
+
end
|
175
|
+
|
176
|
+
sig { override.params(indent: Integer).returns(String) }
|
177
|
+
def inspect(indent = 0)
|
178
|
+
"#{INDENT_UNIT * indent}true"
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Represents a nil literal eg. `nil`
|
183
|
+
class NilLiteralNode < ExpressionNode
|
184
|
+
sig { override.params(indent: Integer).returns(String) }
|
185
|
+
def to_s(indent = 0)
|
186
|
+
"#{INDENT_UNIT * indent}nil"
|
187
|
+
end
|
188
|
+
|
189
|
+
sig { override.params(indent: Integer).returns(String) }
|
190
|
+
def inspect(indent = 0)
|
191
|
+
"#{INDENT_UNIT * indent}nil"
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
# Represents a self literal eg. `self`
|
196
|
+
class SelfLiteralNode < ExpressionNode
|
197
|
+
sig { override.params(indent: Integer).returns(String) }
|
198
|
+
def to_s(indent = 0)
|
199
|
+
"#{INDENT_UNIT * indent}self"
|
200
|
+
end
|
201
|
+
|
202
|
+
sig { override.params(indent: Integer).returns(String) }
|
203
|
+
def inspect(indent = 0)
|
204
|
+
"#{INDENT_UNIT * indent}self"
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
# Represents a float literal eg. `123.5`
|
209
|
+
class FloatLiteralNode < ExpressionNode
|
210
|
+
sig { returns(String) }
|
211
|
+
attr_reader :value
|
212
|
+
|
213
|
+
sig { params(value: String, span: Span).void }
|
214
|
+
def initialize(value:, span: Span::ZERO)
|
215
|
+
@span = span
|
216
|
+
@value = value
|
217
|
+
end
|
218
|
+
|
219
|
+
sig { params(other: Object).returns(T::Boolean) }
|
220
|
+
def ==(other)
|
221
|
+
return false unless other.is_a?(FloatLiteralNode)
|
222
|
+
|
223
|
+
@value == other.value
|
224
|
+
end
|
225
|
+
|
226
|
+
sig { override.params(indent: Integer).returns(String) }
|
227
|
+
def to_s(indent = 0)
|
228
|
+
"#{INDENT_UNIT * indent}#{value}"
|
229
|
+
end
|
230
|
+
|
231
|
+
sig { override.params(indent: Integer).returns(String) }
|
232
|
+
def inspect(indent = 0)
|
233
|
+
"#{INDENT_UNIT * indent}#{value}"
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
# Represents an integer literal eg. `123`
|
238
|
+
class IntegerLiteralNode < ExpressionNode
|
239
|
+
sig { returns(String) }
|
240
|
+
attr_reader :value
|
241
|
+
|
242
|
+
sig { params(value: String, span: Span).void }
|
243
|
+
def initialize(value:, span: Span::ZERO)
|
244
|
+
@span = span
|
245
|
+
@value = value
|
246
|
+
end
|
247
|
+
|
248
|
+
sig { params(other: Object).returns(T::Boolean) }
|
249
|
+
def ==(other)
|
250
|
+
return false unless other.is_a?(IntegerLiteralNode)
|
251
|
+
|
252
|
+
@value == other.value
|
253
|
+
end
|
254
|
+
|
255
|
+
sig { override.params(indent: Integer).returns(String) }
|
256
|
+
def to_s(indent = 0)
|
257
|
+
"#{INDENT_UNIT * indent}#{value}"
|
258
|
+
end
|
259
|
+
|
260
|
+
sig { override.params(indent: Integer).returns(String) }
|
261
|
+
def inspect(indent = 0)
|
262
|
+
"#{INDENT_UNIT * indent}#{value}"
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# Represents a string literal eg. `"foo"`
|
267
|
+
class StringLiteralNode < ExpressionNode
|
268
|
+
sig { returns(String) }
|
269
|
+
attr_reader :value
|
270
|
+
|
271
|
+
sig { params(value: String, span: Span).void }
|
272
|
+
def initialize(value:, span: Span::ZERO)
|
273
|
+
@span = span
|
274
|
+
@value = value
|
275
|
+
end
|
276
|
+
|
277
|
+
sig { params(other: Object).returns(T::Boolean) }
|
278
|
+
def ==(other)
|
279
|
+
return false unless other.is_a?(StringLiteralNode)
|
280
|
+
|
281
|
+
@value == other.value
|
282
|
+
end
|
283
|
+
|
284
|
+
sig { override.params(indent: Integer).returns(String) }
|
285
|
+
def to_s(indent = 0)
|
286
|
+
"#{INDENT_UNIT * indent}#{value.inspect}"
|
287
|
+
end
|
288
|
+
|
289
|
+
sig { override.params(indent: Integer).returns(String) }
|
290
|
+
def inspect(indent = 0)
|
291
|
+
"#{INDENT_UNIT * indent}#{value.inspect}"
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
# Represents an identifier like `a`, `foo`
|
296
|
+
class IdentifierNode < ExpressionNode
|
297
|
+
sig { returns(String) }
|
298
|
+
attr_reader :value
|
299
|
+
|
300
|
+
sig { params(value: String, span: Span).void }
|
301
|
+
def initialize(value:, span: Span::ZERO)
|
302
|
+
@span = span
|
303
|
+
@value = value
|
304
|
+
end
|
305
|
+
|
306
|
+
sig { params(other: Object).returns(T::Boolean) }
|
307
|
+
def ==(other)
|
308
|
+
return false unless other.is_a?(IdentifierNode)
|
309
|
+
|
310
|
+
@value == other.value
|
311
|
+
end
|
312
|
+
|
313
|
+
sig { override.params(indent: Integer).returns(String) }
|
314
|
+
def to_s(indent = 0)
|
315
|
+
"#{INDENT_UNIT * indent}#{@value}"
|
316
|
+
end
|
317
|
+
|
318
|
+
sig { override.params(indent: Integer).returns(String) }
|
319
|
+
def inspect(indent = 0)
|
320
|
+
"#{INDENT_UNIT * indent}#{@value}"
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|
325
|
+
end
|
@@ -0,0 +1,380 @@
|
|
1
|
+
# typed: strict
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative 'token'
|
5
|
+
|
6
|
+
module MiniRuby
|
7
|
+
# A lexical analyzer (tokenizer) for MiniRuby
|
8
|
+
class Lexer
|
9
|
+
extend T::Sig
|
10
|
+
extend T::Generic
|
11
|
+
include Enumerable
|
12
|
+
|
13
|
+
# Type parameter for `Enumerable`
|
14
|
+
# Declares the type that the lexer returns for tokens
|
15
|
+
Elem = type_member { { fixed: Token } }
|
16
|
+
|
17
|
+
class << self
|
18
|
+
extend T::Sig
|
19
|
+
|
20
|
+
sig { params(source: String).returns(T::Array[Token]) }
|
21
|
+
def lex(source)
|
22
|
+
new(source).to_a
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
sig { params(source: String).void }
|
27
|
+
def initialize(source)
|
28
|
+
@source = source
|
29
|
+
|
30
|
+
# offset of the first character of the current lexeme
|
31
|
+
@start_cursor = T.let(0, Integer)
|
32
|
+
# offset of the next character
|
33
|
+
@cursor = T.let(0, Integer)
|
34
|
+
end
|
35
|
+
|
36
|
+
sig { returns(Token) }
|
37
|
+
def next
|
38
|
+
return Token.new(Token::END_OF_FILE, Span.new(Position.new(0), Position.new(0))) unless more_tokens?
|
39
|
+
|
40
|
+
scan_token
|
41
|
+
end
|
42
|
+
|
43
|
+
sig { override.params(block: T.nilable(T.proc.params(arg0: Token).void)).returns(T.untyped) }
|
44
|
+
def each(&block)
|
45
|
+
return enum_for(T.must(__method__)) unless block
|
46
|
+
|
47
|
+
loop do
|
48
|
+
tok = self.next
|
49
|
+
break if tok.type == Token::END_OF_FILE
|
50
|
+
|
51
|
+
block.call(tok)
|
52
|
+
end
|
53
|
+
|
54
|
+
self
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
sig { returns(T::Boolean) }
|
60
|
+
def more_tokens?
|
61
|
+
@cursor < @source.length
|
62
|
+
end
|
63
|
+
|
64
|
+
sig { params(type: Symbol).returns(Token) }
|
65
|
+
def token_with_consumed_value(type)
|
66
|
+
token(type, token_value)
|
67
|
+
end
|
68
|
+
|
69
|
+
sig { params(type: Symbol, value: T.nilable(String)).returns(Token) }
|
70
|
+
def token(type, value = nil)
|
71
|
+
span = Span.new(Position.new(@start_cursor), Position.new(@cursor - 1))
|
72
|
+
@start_cursor = @cursor
|
73
|
+
Token.new(type, span, value)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Returns the current token value.
|
77
|
+
sig { returns(String) }
|
78
|
+
def token_value
|
79
|
+
T.must @source[@start_cursor...@cursor]
|
80
|
+
end
|
81
|
+
|
82
|
+
sig { returns([String, T::Boolean]) }
|
83
|
+
def advance_char
|
84
|
+
return '', false unless more_tokens?
|
85
|
+
|
86
|
+
char = next_char
|
87
|
+
|
88
|
+
@cursor += 1
|
89
|
+
[char, true]
|
90
|
+
end
|
91
|
+
|
92
|
+
sig { returns(String) }
|
93
|
+
def next_char
|
94
|
+
T.must @source[@cursor]
|
95
|
+
end
|
96
|
+
|
97
|
+
# Gets the next UTF-8 encoded character
|
98
|
+
# without incrementing the cursor.
|
99
|
+
sig { returns(String) }
|
100
|
+
def peek_char
|
101
|
+
return '' unless more_tokens?
|
102
|
+
|
103
|
+
char, = next_char
|
104
|
+
char
|
105
|
+
end
|
106
|
+
|
107
|
+
# Advance the next `n` characters
|
108
|
+
sig { params(n: Integer).returns(T::Boolean) }
|
109
|
+
def advance_chars(n)
|
110
|
+
n.times do
|
111
|
+
_, ok = advance_char
|
112
|
+
return false unless ok
|
113
|
+
end
|
114
|
+
|
115
|
+
true
|
116
|
+
end
|
117
|
+
|
118
|
+
# Checks if the given character matches
|
119
|
+
# the next UTF-8 encoded character in source code.
|
120
|
+
# If they match, the cursor gets incremented.
|
121
|
+
sig { params(char: String).returns(T::Boolean) }
|
122
|
+
def match_char(char)
|
123
|
+
return false unless more_tokens?
|
124
|
+
|
125
|
+
if peek_char == char
|
126
|
+
advance_char
|
127
|
+
return true
|
128
|
+
end
|
129
|
+
|
130
|
+
false
|
131
|
+
end
|
132
|
+
|
133
|
+
# Consumes the next character if it's from the valid set.
|
134
|
+
sig { params(valid_chars: String).returns(T::Boolean) }
|
135
|
+
def match_chars(valid_chars)
|
136
|
+
return false unless more_tokens?
|
137
|
+
|
138
|
+
p = peek_char
|
139
|
+
if p != '' && valid_chars.include?(p)
|
140
|
+
advance_char
|
141
|
+
return true
|
142
|
+
end
|
143
|
+
|
144
|
+
false
|
145
|
+
end
|
146
|
+
|
147
|
+
# Rewinds the cursor back n chars.
|
148
|
+
sig { params(n: Integer).void }
|
149
|
+
def backup_chars(n)
|
150
|
+
@cursor -= n
|
151
|
+
end
|
152
|
+
|
153
|
+
# Skips the current accumulated token.
|
154
|
+
sig { void }
|
155
|
+
def skip_token
|
156
|
+
@start_cursor = @cursor
|
157
|
+
end
|
158
|
+
|
159
|
+
sig { returns(Token) }
|
160
|
+
def scan_token
|
161
|
+
loop do
|
162
|
+
char, ok = advance_char
|
163
|
+
return token(Token::END_OF_FILE) unless ok
|
164
|
+
|
165
|
+
case char
|
166
|
+
when ','
|
167
|
+
return token(Token::COMMA)
|
168
|
+
when ';'
|
169
|
+
return token(Token::SEMICOLON)
|
170
|
+
when '('
|
171
|
+
return token(Token::LPAREN)
|
172
|
+
when ')'
|
173
|
+
return token(Token::RPAREN)
|
174
|
+
when '!'
|
175
|
+
return token(Token::NOT_EQUAL) if match_char('=')
|
176
|
+
|
177
|
+
return token(Token::BANG)
|
178
|
+
when '='
|
179
|
+
return token(Token::EQUAL_EQUAL) if match_char('=')
|
180
|
+
|
181
|
+
return token(Token::EQUAL)
|
182
|
+
when '>'
|
183
|
+
return token(Token::GREATER_EQUAL) if match_char('=')
|
184
|
+
|
185
|
+
return token(Token::GREATER)
|
186
|
+
when '<'
|
187
|
+
return token(Token::LESS_EQUAL) if match_char('=')
|
188
|
+
|
189
|
+
return token(Token::LESS)
|
190
|
+
when '+'
|
191
|
+
return token(Token::PLUS)
|
192
|
+
when '-'
|
193
|
+
return token(Token::MINUS)
|
194
|
+
when '*'
|
195
|
+
return token(Token::STAR)
|
196
|
+
when '/'
|
197
|
+
return token(Token::SLASH)
|
198
|
+
when '"'
|
199
|
+
return scan_string
|
200
|
+
when "\n"
|
201
|
+
return token(Token::NEWLINE)
|
202
|
+
when ' ', "\r", "\t"
|
203
|
+
skip_token
|
204
|
+
next
|
205
|
+
else
|
206
|
+
if char.match?(/[[:alpha:]]/)
|
207
|
+
return scan_identifier
|
208
|
+
elsif char.match?(/\d/)
|
209
|
+
return scan_number(char)
|
210
|
+
end
|
211
|
+
|
212
|
+
return token(Token::ERROR, "unexpected char `#{char}`")
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
sig { params(char: String).returns(T::Boolean) }
|
218
|
+
def identifier_char?(char)
|
219
|
+
char.match?(/[[:alpha:][:digit:]_]/)
|
220
|
+
end
|
221
|
+
|
222
|
+
sig { returns(Token) }
|
223
|
+
def scan_identifier
|
224
|
+
advance_char while identifier_char?(peek_char)
|
225
|
+
|
226
|
+
value = token_value
|
227
|
+
return token(value.to_sym) if Token::KEYWORDS.include?(value)
|
228
|
+
|
229
|
+
token(Token::IDENTIFIER, value)
|
230
|
+
end
|
231
|
+
|
232
|
+
sig { void }
|
233
|
+
def consume_digits
|
234
|
+
loop do
|
235
|
+
p = peek_char
|
236
|
+
break if p == '' || !Token::DIGITS.include?(peek_char)
|
237
|
+
|
238
|
+
_, ok = advance_char
|
239
|
+
break unless ok
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
# Checks if the next `n` characters are from the valid set.
|
244
|
+
sig { params(valid_chars: String, n: Integer).returns(T::Boolean) }
|
245
|
+
def accept_chars(valid_chars, n)
|
246
|
+
result = T.let(true, T::Boolean)
|
247
|
+
n.times do
|
248
|
+
unless match_chars(valid_chars)
|
249
|
+
result = false
|
250
|
+
break
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
backup_chars(n)
|
255
|
+
|
256
|
+
result
|
257
|
+
end
|
258
|
+
|
259
|
+
sig { params(init_char: String).returns(Token) }
|
260
|
+
def scan_number(init_char)
|
261
|
+
if init_char == '0'
|
262
|
+
p = peek_char
|
263
|
+
if accept_chars(Token::DIGITS, 1)
|
264
|
+
consume_digits
|
265
|
+
return token(
|
266
|
+
Token::ERROR,
|
267
|
+
'illegal trailing zero in number literal',
|
268
|
+
)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
consume_digits
|
273
|
+
|
274
|
+
is_float = false
|
275
|
+
|
276
|
+
if match_char('.')
|
277
|
+
is_float = true
|
278
|
+
p = peek_char
|
279
|
+
if p == ''
|
280
|
+
return token(
|
281
|
+
Token::ERROR,
|
282
|
+
'unexpected EOF',
|
283
|
+
)
|
284
|
+
end
|
285
|
+
|
286
|
+
unless Token::DIGITS.include?(p)
|
287
|
+
return token(
|
288
|
+
Token::ERROR,
|
289
|
+
"unexpected char in number literal: `#{p}`",
|
290
|
+
)
|
291
|
+
end
|
292
|
+
|
293
|
+
consume_digits
|
294
|
+
end
|
295
|
+
|
296
|
+
if match_char('e') || match_char('E')
|
297
|
+
is_float = true
|
298
|
+
match_char('+') || match_char('-')
|
299
|
+
p = peek_char
|
300
|
+
if p == ''
|
301
|
+
return token(
|
302
|
+
Token::ERROR,
|
303
|
+
'unexpected EOF',
|
304
|
+
)
|
305
|
+
end
|
306
|
+
unless Token::DIGITS.include?(p)
|
307
|
+
return token(
|
308
|
+
Token::ERROR,
|
309
|
+
"unexpected char in number literal: `#{p}`",
|
310
|
+
)
|
311
|
+
end
|
312
|
+
consume_digits
|
313
|
+
end
|
314
|
+
|
315
|
+
if is_float
|
316
|
+
return token_with_consumed_value(Token::FLOAT)
|
317
|
+
end
|
318
|
+
|
319
|
+
token_with_consumed_value(Token::INTEGER)
|
320
|
+
end
|
321
|
+
|
322
|
+
sig { void }
|
323
|
+
def swallow_rest_of_the_string
|
324
|
+
loop do
|
325
|
+
# swallow the rest of the string
|
326
|
+
ch, more_tokens = advance_char
|
327
|
+
break if !more_tokens || ch == '"'
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
sig { returns(Token) }
|
332
|
+
def scan_string
|
333
|
+
value_buffer = String.new
|
334
|
+
loop do
|
335
|
+
char, ok = advance_char
|
336
|
+
return token(Token::ERROR, 'unterminated string literal') unless ok
|
337
|
+
return token(Token::STRING, value_buffer) if char == '"'
|
338
|
+
|
339
|
+
if char != '\\'
|
340
|
+
value_buffer << char
|
341
|
+
next
|
342
|
+
end
|
343
|
+
|
344
|
+
char, ok = advance_char
|
345
|
+
return token(Token::ERROR, 'unterminated string literal') unless ok
|
346
|
+
|
347
|
+
case char
|
348
|
+
when '"'
|
349
|
+
value_buffer << '"'
|
350
|
+
when '\\'
|
351
|
+
value_buffer << '\\'
|
352
|
+
when '/'
|
353
|
+
value_buffer << '/'
|
354
|
+
when 'b'
|
355
|
+
value_buffer << "\b"
|
356
|
+
when 'f'
|
357
|
+
value_buffer << "\f"
|
358
|
+
when 'n'
|
359
|
+
value_buffer << "\n"
|
360
|
+
when 'r'
|
361
|
+
value_buffer << "\r"
|
362
|
+
when 't'
|
363
|
+
value_buffer << "\t"
|
364
|
+
when 'u'
|
365
|
+
unless accept_chars(Token::HEX_DIGITS, 4)
|
366
|
+
swallow_rest_of_the_string
|
367
|
+
return token(Token::ERROR, 'invalid unicode escape')
|
368
|
+
end
|
369
|
+
|
370
|
+
advance_chars(4)
|
371
|
+
last4 = T.must @source[@cursor - 4...@cursor]
|
372
|
+
value_buffer << [last4.hex].pack('U')
|
373
|
+
else
|
374
|
+
swallow_rest_of_the_string
|
375
|
+
return token(Token::ERROR, "invalid escape `\\#{char}`")
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
end
|
380
|
+
end
|