layo 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. data/LICENSE +26 -0
  2. data/README.mkd +103 -0
  3. data/Rakefile +21 -0
  4. data/UnicodeData.txt +23697 -0
  5. data/bin/layo +22 -0
  6. data/layo.gemspec +23 -0
  7. data/lib/layo.rb +11 -0
  8. data/lib/layo/ast.rb +5 -0
  9. data/lib/layo/ast/block.rb +13 -0
  10. data/lib/layo/ast/expression.rb +14 -0
  11. data/lib/layo/ast/node.rb +6 -0
  12. data/lib/layo/ast/program.rb +9 -0
  13. data/lib/layo/ast/statement.rb +10 -0
  14. data/lib/layo/interpreter.rb +360 -0
  15. data/lib/layo/lexer.rb +162 -0
  16. data/lib/layo/parser.rb +371 -0
  17. data/lib/layo/peekable.rb +31 -0
  18. data/lib/layo/runtime_error.rb +9 -0
  19. data/lib/layo/syntax_error.rb +14 -0
  20. data/lib/layo/tokenizer.rb +119 -0
  21. data/lib/layo/unexpected_token_error.rb +13 -0
  22. data/lib/layo/unicode.rb +23614 -0
  23. data/lib/layo/unknown_token_error.rb +7 -0
  24. data/spec/interpreter_spec.rb +52 -0
  25. data/spec/lexer_spec.rb +176 -0
  26. data/spec/parser_spec.rb +373 -0
  27. data/spec/source/basic/comments.lol +16 -0
  28. data/spec/source/basic/comments.out +2 -0
  29. data/spec/source/basic/line-continuation.lol +8 -0
  30. data/spec/source/basic/line-continuation.out +2 -0
  31. data/spec/source/basic/line-endings.lol +5 -0
  32. data/spec/source/basic/line-endings.out +3 -0
  33. data/spec/source/basic/minimal.lol +2 -0
  34. data/spec/source/casting/boolean.lol +8 -0
  35. data/spec/source/casting/boolean.out +5 -0
  36. data/spec/source/casting/float.lol +10 -0
  37. data/spec/source/casting/float.out +5 -0
  38. data/spec/source/casting/int.lol +9 -0
  39. data/spec/source/casting/int.out +4 -0
  40. data/spec/source/casting/nil.lol +9 -0
  41. data/spec/source/casting/nil.out +4 -0
  42. data/spec/source/casting/string.lol +5 -0
  43. data/spec/source/casting/string.out +2 -0
  44. data/spec/source/expressions/boolean.lol +30 -0
  45. data/spec/source/expressions/boolean.out +17 -0
  46. data/spec/source/expressions/cast.lol +28 -0
  47. data/spec/source/expressions/cast.out +20 -0
  48. data/spec/source/expressions/function.lol +24 -0
  49. data/spec/source/expressions/function.out +4 -0
  50. data/spec/source/expressions/math.lol +9 -0
  51. data/spec/source/expressions/math.out +7 -0
  52. data/spec/source/expressions/string.lol +20 -0
  53. data/spec/source/expressions/string.out +7 -0
  54. data/spec/source/statements/assignment.lol +8 -0
  55. data/spec/source/statements/assignment.out +3 -0
  56. data/spec/source/statements/cast.lol +11 -0
  57. data/spec/source/statements/cast.out +3 -0
  58. data/spec/source/statements/declaration.lol +9 -0
  59. data/spec/source/statements/declaration.out +2 -0
  60. data/spec/source/statements/expression.lol +10 -0
  61. data/spec/source/statements/expression.out +2 -0
  62. data/spec/source/statements/if_then_else.lol +42 -0
  63. data/spec/source/statements/if_then_else.out +3 -0
  64. data/spec/source/statements/input.in +1 -0
  65. data/spec/source/statements/input.lol +4 -0
  66. data/spec/source/statements/input.out +1 -0
  67. data/spec/source/statements/loop.lol +50 -0
  68. data/spec/source/statements/loop.out +20 -0
  69. data/spec/source/statements/print.lol +7 -0
  70. data/spec/source/statements/print.out +2 -0
  71. data/spec/source/statements/switch.lol +95 -0
  72. data/spec/source/statements/switch.out +12 -0
  73. data/spec/tokenizer_spec.rb +105 -0
  74. metadata +135 -0
data/lib/layo/lexer.rb ADDED
@@ -0,0 +1,162 @@
1
+ # encoding: UTF-8
2
+ module Layo
3
+ class Lexer
4
+ include Peekable
5
+ # Input stream. Must be an instance of IO class (File, StringIO)
6
+ attr_accessor :input
7
+ # Current line number (1-based) and position (0-based) of cursor
8
+ attr_reader :pos, :line_no
9
+
10
+ def initialize(io = nil)
11
+ self.input = io unless io.nil?
12
+ end
13
+
14
+ # Sets input stream and resets variables
15
+ def input=(io)
16
+ @input = io
17
+ reset
18
+ end
19
+
20
+ # Resets this lexer instance
21
+ def reset
22
+ @line_no, @last_lexeme = 0, ["\n"]
23
+ super
24
+ end
25
+
26
+ def space?(char)
27
+ char == ' ' || char == "\t"
28
+ end
29
+
30
+ # Tells whether there is a lexeme delimiter at position pos in current line
31
+ def lexeme_delimiter?(pos)
32
+ @line[pos] == '!' || @line[pos] == ',' ||
33
+ @line[pos] == "\n" || space?(@line[pos]) ||
34
+ @line[pos] == '…' || @line[pos, 3] == '...'
35
+ end
36
+
37
+ # Reads and returns next lexeme
38
+ def next_item
39
+ return @last_lexeme if @last_lexeme[0].nil?
40
+ while true
41
+ @line = next_line if @line_no.zero? || @pos > @line.length - 1
42
+ if @line.nil?
43
+ lexeme = [nil, @line_no, 1]
44
+ break
45
+ end
46
+
47
+ # Skip whitespaces
48
+ while space?(@line[@pos])
49
+ @pos += 1
50
+ end
51
+
52
+ # Skip triple dot characters (join lines)
53
+ if @line[@pos, 4] == "...\n" || @line[@pos, 2] == "…\n"
54
+ line_no, pos = @line_no, @pos + 1
55
+ @line, @pos = next_line, 0
56
+ if @line.nil? || @line.strip.empty?
57
+ raise SyntaxError.new(line_no, pos, 'Line continuation may not be followed by an empty line')
58
+ end
59
+ next
60
+ end
61
+
62
+ # Skip one line comments
63
+ if @line[@pos, 3] == 'BTW'
64
+ @pos = @line.length - 1
65
+ end
66
+ # and multiline ones
67
+ if @last_lexeme[0] == "\n" && @line[@pos, 4] == 'OBTW'
68
+ tldr_found, line_no, pos = false, @line_no, @pos + 1
69
+ while true
70
+ @line = next_line
71
+ break if @line.nil?
72
+ m = @line.chomp.match(/(^|\s+)TLDR\s*(,|$)/)
73
+ unless m.nil?
74
+ tldr_found = true
75
+ @pos = m.end(0)
76
+ break
77
+ end
78
+ end
79
+ unless tldr_found
80
+ raise SyntaxError.new(line_no, pos, 'Unterminated multiline comment')
81
+ end
82
+ next
83
+ end
84
+
85
+ if @line[@pos] == "\n" || @line[@pos] == '!'
86
+ # Handle newline and bang separately
87
+ lexeme = [@line[@pos], @line_no, @pos + 1]
88
+ @pos += 1
89
+ elsif @line[@pos] == ','
90
+ # Comma is a virtual newline
91
+ lexeme = ["\n", @line_no, @pos + 1]
92
+ @pos += 1
93
+ elsif @line[@pos] == '"'
94
+ # Strings begin with "
95
+ # Need to handle empty strings separately
96
+ if @line[@pos + 1] == '"'
97
+ string = '""'
98
+ else
99
+ m = @line.match(/([^:](?:::)*)"/, @pos + 1)
100
+ string = @line[@pos..m.end(0) - 1] unless m.nil?
101
+ end
102
+ # String must be followed by an allowed lexeme delimiter
103
+ if string.nil? || !lexeme_delimiter?(@pos + string.length)
104
+ raise SyntaxError.new(@line_no, @pos + 1, 'Unterminated string constant')
105
+ end
106
+ lexeme = [%Q["#{escape_string(string[1..-2])}"], @line_no, @pos + 1]
107
+ @pos = @pos + string.length
108
+ else
109
+ # Grab as much characters as we can until meeting lexeme delimiter
110
+ # Treat what we grabbed as a lexeme
111
+ seq, pos = '', @pos + 1
112
+ until lexeme_delimiter?(@pos)
113
+ seq += @line[@pos]
114
+ @pos += 1
115
+ end
116
+ lexeme = [seq, @line_no, pos]
117
+ end
118
+
119
+ break
120
+ end
121
+ @last_lexeme = lexeme
122
+ end
123
+
124
+ # Reads and returns next line from input stream. Converts newline
125
+ # character to \n
126
+ # returns nil upon reaching EOF
127
+ def next_line
128
+ return nil if @input.eof?
129
+ line, ch, @pos, @line_no = '', '', 0, @line_no + 1
130
+ until ch == "\r" || ch == "\n" || ch.nil?
131
+ ch = @input.getc
132
+ line += ch unless ch.nil?
133
+ end
134
+ if ch == "\r"
135
+ ch = @input.getc
136
+ @input.ungetc(ch) unless ch == "\n" || ch.nil?
137
+ end
138
+ line.chomp << "\n"
139
+ end
140
+
141
+ # Performs substitution of escape characters in string
142
+ def escape_string(str)
143
+ replacement = {
144
+ ':)' => "\n", ':>' => "\t", ':o' => "\a", ':"' => '"', '::' => ':'
145
+ }
146
+ str
147
+ .gsub(/:[\)>o":]/, replacement)
148
+ .gsub(/:\(([0-9a-fA-F]+)\)/) do |match|
149
+ $1.to_i(16).chr(Encoding::UTF_8)
150
+ end
151
+ .gsub(/:\[(.+?)\]/) do |match|
152
+ code = Unicode::DATA[$1]
153
+ if code
154
+ code.chr(Encoding::UTF_8)
155
+ else
156
+ $stderr.puts("Unknown Unicode normative name: #{$1}")
157
+ match
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
@@ -0,0 +1,371 @@
1
+ module Layo
2
+ class Parser
3
+ attr_accessor :tokenizer
4
+ attr_reader :functions
5
+
6
+ def initialize(tokenizer)
7
+ @tokenizer, @functions = tokenizer, {}
8
+ end
9
+
10
+ # Function declarations should be parsed first in order to properly
11
+ # parse argument list and allow calling functions before their definition.
12
+ # So this method should be called as the first pass before parsing begins
13
+ def parse_function_declarations
14
+ @functions = {}
15
+ @tokenizer.reset_peek
16
+ until (token = @tokenizer.peek)[:type] == :eof
17
+ if token[:type] == :how_duz_i
18
+ # Function name must follow
19
+ token = @tokenizer.peek
20
+ unless token[:type] == :identifier
21
+ raise UnexpectedTokenError, token
22
+ end
23
+ name = token[:data]
24
+ args = []
25
+ token = @tokenizer.peek
26
+ if token[:type] == :yr
27
+ # Function arguments must follow
28
+ begin
29
+ token = @tokenizer.peek
30
+ unless token[:type] == :identifier
31
+ raise UnexpectedTokenError, token
32
+ end
33
+ args << token[:data]
34
+ end while @tokenizer.peek[:type] == :an_yr
35
+ end
36
+ @tokenizer.unpeek
37
+ @functions[name] = args
38
+ # Newline must follow
39
+ token = @tokenizer.peek
40
+ unless token[:type] == :newline
41
+ raise UnexpectedTokenError, token
42
+ end
43
+ end
44
+ end
45
+ @tokenizer.reset_peek
46
+ end
47
+
48
+ def parse_program
49
+ parse_function_declarations
50
+ skip_newlines
51
+ expect_token(:hai)
52
+ version = expect_token(:float)[:data]
53
+ expect_token(:newline)
54
+ block = parse_block
55
+ expect_token(:kthxbye)
56
+ skip_newlines
57
+ expect_token(:eof)
58
+ Ast::Program.new(version, block)
59
+ end
60
+
61
+ alias_method :parse, :parse_program
62
+
63
+ def expect_token(*types)
64
+ token = @tokenizer.next
65
+ raise UnexpectedTokenError, token unless types.include?(token[:type])
66
+ token
67
+ end
68
+
69
+ def skip_newlines
70
+ while @tokenizer.peek[:type] == :newline
71
+ @tokenizer.next
72
+ end
73
+ @tokenizer.unpeek
74
+ end
75
+
76
+ def parse_block
77
+ statements = []
78
+ begin
79
+ skip_newlines
80
+ unless (name = next_statement).nil?
81
+ statements << parse_statement(name)
82
+ end
83
+ end until name.nil?
84
+ Ast::Block.new(statements)
85
+ end
86
+
87
+ def next_statement
88
+ return 'assignment' if @tokenizer.try(:identifier, :r)
89
+ return 'break' if @tokenizer.try(:gtfo)
90
+ return 'cast' if @tokenizer.try(:identifier, :is_now_a)
91
+ return 'condition' if @tokenizer.try(:o_rly?)
92
+ return 'declaration' if @tokenizer.try(:i_has_a)
93
+ return 'function' if @tokenizer.try(:how_duz_i)
94
+ return 'input' if @tokenizer.try(:gimmeh)
95
+ return 'loop' if @tokenizer.try(:im_in_yr)
96
+ return 'print' if @tokenizer.try(:visible)
97
+ return 'return' if @tokenizer.try(:found_yr)
98
+ return 'switch' if @tokenizer.try(:wtf?)
99
+ return 'expression' if !next_expression.nil?
100
+ nil
101
+ end
102
+
103
+ def parse_statement(name)
104
+ token = @tokenizer.peek
105
+ @tokenizer.unpeek
106
+ statement = send("parse_#{name}_statement".to_sym)
107
+ expect_token(:newline)
108
+ statement.line = token[:line]
109
+ statement
110
+ end
111
+
112
+ def parse_assignment_statement
113
+ attrs = { identifier: expect_token(:identifier)[:data] }
114
+ expect_token(:r)
115
+ attrs[:expression] = parse_expression
116
+ Ast::Statement.new('assignment', attrs)
117
+ end
118
+
119
+ def parse_break_statement
120
+ expect_token(:gtfo)
121
+ Ast::Statement.new('break')
122
+ end
123
+
124
+ def parse_cast_statement
125
+ attrs = { identifier: expect_token(:identifier)[:data] }
126
+ expect_token(:is_now_a)
127
+ attrs[:to] = expect_token(:noob, :troof, :numbr, :numbar, :yarn)[:type]
128
+ Ast::Statement.new('cast', attrs)
129
+ end
130
+
131
+ def parse_condition_statement
132
+ expect_token(:o_rly?)
133
+ expect_token(:newline)
134
+ expect_token(:ya_rly)
135
+ expect_token(:newline)
136
+ attrs = { then: parse_block, elseif: [] }
137
+ while @tokenizer.peek[:type] == :mebbe
138
+ expect_token(:mebbe)
139
+ condition = parse_expression
140
+ expect_token(:newline)
141
+ attrs[:elseif] << { condition: condition, block: parse_block }
142
+ end
143
+ @tokenizer.unpeek
144
+ if @tokenizer.peek[:type] == :no_wai
145
+ expect_token(:no_wai)
146
+ expect_token(:newline)
147
+ attrs[:else] = parse_block
148
+ end
149
+ @tokenizer.unpeek
150
+ expect_token(:oic)
151
+ Ast::Statement.new('condition', attrs)
152
+ end
153
+
154
+ def parse_declaration_statement
155
+ expect_token(:i_has_a)
156
+ attrs = { identifier: expect_token(:identifier)[:data] }
157
+ if @tokenizer.peek[:type] == :itz
158
+ @tokenizer.next
159
+ attrs[:initialization] = parse_expression
160
+ end
161
+ @tokenizer.unpeek
162
+ Ast::Statement.new('declaration', attrs)
163
+ end
164
+
165
+ def parse_expression_statement
166
+ attrs = { expression: parse_expression }
167
+ Ast::Statement.new('expression', attrs)
168
+ end
169
+
170
+ def parse_function_statement
171
+ expect_token(:how_duz_i)
172
+ name = expect_token(:identifier)[:data]
173
+ if @functions.has_key?(name)
174
+ # Function definition was parsed in the first pass
175
+ until @tokenizer.next[:type] == :newline; end
176
+ args = @functions[name]
177
+ else
178
+ # Parse argument list as usual
179
+ args = []
180
+ if @tokenizer.peek[:type] == :yr
181
+ begin
182
+ @tokenizer.next
183
+ args << expect_token(:identifier)[:data]
184
+ end while @tokenizer.peek[:type] == :an_yr
185
+ end
186
+ @tokenizer.unpeek
187
+ expect_token(:newline)
188
+ @functions[name] = args
189
+ end
190
+ block = parse_block
191
+ expect_token(:if_u_say_so)
192
+ Ast::Statement.new('function', { name: name, args: args, block: block })
193
+ end
194
+
195
+ def parse_input_statement
196
+ expect_token(:gimmeh)
197
+ attrs = { identifier: expect_token(:identifier)[:data] }
198
+ Ast::Statement.new('input', attrs)
199
+ end
200
+
201
+ def parse_loop_statement
202
+ loop_start = expect_token(:im_in_yr)
203
+ label_begin = expect_token(:identifier)[:data]
204
+ attrs = {}
205
+ if [:uppin, :nerfin, :identifier].include?(@tokenizer.peek[:type])
206
+ attrs[:op] = expect_token(:uppin, :nerfin, :identifier)
207
+ expect_token(:yr)
208
+ attrs[:op] = attrs[:op][:type] == :identifier ? attrs[:op][:data] :
209
+ attrs[:op][:type]
210
+ attrs[:counter] = expect_token(:identifier)[:data]
211
+ end
212
+ @tokenizer.unpeek
213
+ if [:til, :wile].include?(@tokenizer.peek[:type])
214
+ attrs[:guard] = { type: expect_token(:til, :wile)[:type] }
215
+ attrs[:guard][:expression] = parse_expression
216
+ end
217
+ @tokenizer.unpeek
218
+ attrs[:block] = parse_block
219
+ expect_token(:im_outta_yr)
220
+ label_end = expect_token(:identifier)[:data]
221
+ unless label_begin == label_end
222
+ raise SyntaxError.new(
223
+ loop_start[:line], loop_start[:pos],
224
+ "Loop labels don't match: '#{label_begin}' and '#{label_end}'"
225
+ )
226
+ end
227
+ attrs[:label] = label_begin
228
+ Ast::Statement.new('loop', attrs)
229
+ end
230
+
231
+ def parse_print_statement
232
+ expect_token(:visible)
233
+ attrs = { expressions: [parse_expression] }
234
+ until (name = next_expression).nil?
235
+ attrs[:expressions] << parse_expression(name)
236
+ end
237
+ attrs[:suppress] = false
238
+ if @tokenizer.peek[:type] == :exclamation
239
+ @tokenizer.next
240
+ attrs[:suppress] = true
241
+ end
242
+ @tokenizer.unpeek
243
+ Ast::Statement.new('print', attrs)
244
+ end
245
+
246
+ def parse_return_statement
247
+ expect_token(:found_yr)
248
+ attrs = { expression: parse_expression }
249
+ Ast::Statement.new('return', attrs)
250
+ end
251
+
252
+ def parse_switch_statement
253
+ expect_token(:wtf?)
254
+ expect_token(:newline)
255
+ parse_case = lambda do
256
+ expect_token(:omg)
257
+ expression = parse_expression('constant')
258
+ expect_token(:newline)
259
+ { expression: expression, block: parse_block }
260
+ end
261
+ attrs = { cases: [parse_case.call] }
262
+ while @tokenizer.peek[:type] == :omg
263
+ attrs[:cases] << parse_case.call
264
+ end
265
+ @tokenizer.unpeek
266
+ if @tokenizer.peek[:type] == :omgwtf
267
+ expect_token(:omgwtf)
268
+ expect_token(:newline)
269
+ attrs[:default] = parse_block
270
+ end
271
+ @tokenizer.unpeek
272
+ expect_token(:oic)
273
+ Ast::Statement.new('switch', attrs)
274
+ end
275
+
276
+ # Returns internal name of the next expression
277
+ def next_expression
278
+ return 'binary' if @tokenizer.try([
279
+ :sum_of, :diff_of, :produkt_of, :quoshunt_of, :mod_of, :biggr_of,
280
+ :smallr_of, :both_of, :either_of, :won_of, :both_saem, :diffrint
281
+ ])
282
+ return 'cast' if @tokenizer.try(:maek)
283
+ return 'constant' if @tokenizer.try([:boolean, :integer, :float, :string])
284
+ return 'identifier' if @tokenizer.try(:identifier)
285
+ return 'nary' if @tokenizer.try([:all_of, :any_of, :smoosh])
286
+ return 'unary' if @tokenizer.try(:not)
287
+ nil
288
+ end
289
+
290
+ def parse_expression(name = nil)
291
+ token = @tokenizer.peek
292
+ @tokenizer.unpeek
293
+ name = next_expression unless name
294
+ unless name
295
+ raise SyntaxError.new(token[:line], token[:pos], 'Expected expression')
296
+ end
297
+ send("parse_#{name}_expression".to_sym)
298
+ end
299
+
300
+ def parse_binary_expression
301
+ attrs = {
302
+ operator: expect_token(
303
+ :sum_of, :diff_of, :produkt_of, :quoshunt_of, :mod_of, :biggr_of,
304
+ :smallr_of, :both_of, :either_of, :won_of, :both_saem, :diffrint
305
+ )[:type]
306
+ }
307
+ attrs[:left] = parse_expression
308
+ @tokenizer.next if @tokenizer.peek[:type] == :an
309
+ @tokenizer.unpeek
310
+ attrs[:right] = parse_expression
311
+ Ast::Expression.new('binary', attrs)
312
+ end
313
+
314
+ def parse_cast_expression
315
+ expect_token(:maek)
316
+ attrs = { being_casted: parse_expression }
317
+ expect_token(:a)
318
+ attrs[:to] = expect_token(:noob, :troof, :numbr, :numbar, :yarn)[:type]
319
+ Ast::Expression.new('cast', attrs)
320
+ end
321
+
322
+ def parse_constant_expression
323
+ token = expect_token(:boolean, :integer, :float, :string)
324
+ Ast::Expression.new('constant', { vtype: token[:type], value: token[:data] })
325
+ end
326
+
327
+ # Identifier expression represents two types of expressions:
328
+ # variable expression: returns value of variable
329
+ # function call expression: returns value of function call
330
+ def parse_identifier_expression
331
+ name = expect_token(:identifier)[:data]
332
+ begin
333
+ function = self.functions.fetch(name)
334
+ # Function call
335
+ attrs = { name: name, parameters: [] }
336
+ function.size.times do |c|
337
+ attrs[:parameters] << parse_expression
338
+ end
339
+ return Ast::Expression.new('function', attrs)
340
+ rescue KeyError
341
+ # Variable name
342
+ return Ast::Expression.new('variable', name: name)
343
+ end
344
+ end
345
+
346
+ def parse_nary_expression
347
+ attrs = { operator: expect_token(:all_of, :any_of, :smoosh)[:type] }
348
+ attrs[:expressions] = [parse_expression]
349
+ while true
350
+ @tokenizer.next if @tokenizer.peek[:type] == :an
351
+ @tokenizer.unpeek
352
+ name = next_expression
353
+ if name.nil? then break else attrs[:expressions] << parse_expression(name) end
354
+ end
355
+ # We need either MKAY or Newline here, but
356
+ # should consume only MKAY if present
357
+ token = @tokenizer.peek
358
+ unless [:mkay, :newline].include?(token[:type])
359
+ raise UnexpectedTokenError, token
360
+ end
361
+ @tokenizer.next if token[:type] == :mkay
362
+ @tokenizer.unpeek
363
+ Ast::Expression.new('nary', attrs)
364
+ end
365
+
366
+ def parse_unary_expression
367
+ expect_token(:not)
368
+ Ast::Expression.new('unary', { expression: parse_expression } )
369
+ end
370
+ end
371
+ end