rubasteme 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubasteme
4
+
5
+ module AST
6
+ class LeafNode < Node
7
+ def initialize(literal)
8
+ super
9
+ @literal = literal
10
+ end
11
+
12
+ attr_reader :literal
13
+
14
+ def to_a
15
+ [type, @literal]
16
+ end
17
+ end
18
+
19
+ class EmptyListNode < LeafNode
20
+ def initialize(_ = nil)
21
+ super("()")
22
+ end
23
+ end
24
+
25
+ class BooleanNode < LeafNode
26
+ def initialize(literal)
27
+ super
28
+ end
29
+ end
30
+
31
+ class IdentifierNode < LeafNode
32
+ def initialize(literal)
33
+ super
34
+ end
35
+ end
36
+
37
+ class CharacterNode < LeafNode
38
+ def initialize(literal)
39
+ super
40
+ end
41
+ end
42
+
43
+ class StringNode < LeafNode
44
+ def initialize(literal)
45
+ super
46
+ end
47
+ end
48
+
49
+ class NumberNode < LeafNode
50
+ def initialize(literal)
51
+ super
52
+ end
53
+ end
54
+
55
+ class DotNode < LeafNode
56
+ def initialize(literal)
57
+ super
58
+ end
59
+ end
60
+
61
+ end # end of AST
62
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubasteme
4
+ class Error < StandardError; end
5
+
6
+ # :stopdoc:
7
+ EMSG = {
8
+ scheme_sytanx_error: "syntax error: got=%s",
9
+ unexpected_token_type: "unexpected token type: got=%s, expected=%s",
10
+ missing_right_parenthesis: "missing right parenthesis",
11
+ unsupported_feature: "unsupported feature: %s",
12
+ not_implemented_yet: "not implemented yet: %s",
13
+ }
14
+ # :startdoc:
15
+
16
+ # Indicates a syntax error as Scheme program.
17
+ class SchemeSyntaxErrorError < Error
18
+ def initialize(literal)
19
+ super(EMSG[:scheme_syntax_error] % literal)
20
+ end
21
+ end
22
+
23
+ # Indicates a token is not expected one.
24
+ class UnexpectedTokenTypeError < Error
25
+ def initialize(got, expected = nil)
26
+ super(EMSG[:unexpected_token_type] % [got, expected])
27
+ end
28
+ end
29
+
30
+ # Indicates a mismatch of parenthesizes.
31
+ class MissingRightParenthesisError < Error
32
+ def initialize
33
+ super(EMSG[:missing_right_parenthesis])
34
+ end
35
+ end
36
+
37
+ # Indicates a feature is not supported in Rubasteme.
38
+ class UnsupportedFeatureError < Error
39
+ def initialize(feature)
40
+ super(EMSG[:unsupported_feature] % feature)
41
+ end
42
+ end
43
+
44
+ # Indicates a feature is not implemented in the current release .
45
+ class NotImplementedYetError < Error
46
+ def initialize(feature)
47
+ super(EMSG[:not_implemented_yet] % feature)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,608 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubasteme
4
+
5
+ def self.parser(lexer)
6
+ Parser.new(lexer)
7
+ end
8
+
9
+ class Parser
10
+
11
+ def initialize(lexer)
12
+ @lexer = lexer
13
+ end
14
+
15
+ def parse
16
+ return [] if @lexer.nil?
17
+ parse_program
18
+ end
19
+
20
+ def parse_program
21
+ ast_program = AST.instantiate(:ast_program, nil)
22
+ Kernel.loop {
23
+ ast_program << parse_expression
24
+ }
25
+ ast_program
26
+ end
27
+
28
+ def parse_expression
29
+ if start_delimiter?(@lexer.peek_token)
30
+ parse_compound_expression
31
+ else
32
+ parse_simple_expression
33
+ end
34
+ end
35
+
36
+ def parse_simple_expression
37
+ type, literal = *@lexer.next_token
38
+ AST.instantiate(ast_simple_type(type), literal)
39
+ end
40
+
41
+ def parse_identifier
42
+ parse_simple_expression
43
+ end
44
+
45
+ TOKEN_START_DELIMITERS = [ # :nodoc:
46
+ :lparen, # list: ( ... )
47
+ :vec_lparen, # vector: #( ... )
48
+ :bytevec_lparen, # bytevector: #u8( ... )
49
+ :quotation, # quotation: '<something>
50
+ :backquote, # quasiquote: `<something>
51
+ :comma, # used in quasiquote
52
+ :comma_at, # used in quasiquote
53
+ :comment_lparen, # comment start
54
+ ]
55
+
56
+ def start_delimiter?(token)
57
+ TOKEN_START_DELIMITERS.include?(token.type)
58
+ end
59
+
60
+ def parse_compound_expression
61
+ case @lexer.peek_token.type
62
+ when :vec_lparen
63
+ parse_vector
64
+ when :quotation
65
+ parse_quotation
66
+ when :lparen
67
+ parse_list_expression
68
+ else
69
+ raise SchemeSyntaxError, @lexer.peek_token.literal
70
+ end
71
+ end
72
+
73
+ def parse_vector
74
+ parse_data_to_matched_rparen
75
+ end
76
+
77
+ def parse_data_to_matched_rparen
78
+ token = @lexer.next_token
79
+ node = AST.instantiate(ast_compound_type(token.type), nil)
80
+ Kernel.loop {
81
+ break if @lexer.peek_token.type == :rparen
82
+ node << parse_datum
83
+ }
84
+ skip_rparen
85
+
86
+ node
87
+ end
88
+
89
+ def parse_datum
90
+ if start_delimiter?(@lexer.peek_token)
91
+ parse_compound_datum
92
+ else
93
+ parse_simple_datum
94
+ end
95
+ end
96
+
97
+ def parse_simple_datum
98
+ parse_simple_expression
99
+ end
100
+
101
+ def parse_compound_datum
102
+ case @lexer.peek_token.type
103
+ when :lparen
104
+ parse_data_list
105
+ when :vec_lparen
106
+ parse_vector
107
+ else
108
+ parse_simple_expression
109
+ end
110
+ end
111
+
112
+ def parse_data_list
113
+ parse_data_to_matched_rparen
114
+ end
115
+
116
+ def parse_quotation
117
+ literal = @lexer.next_token.literal
118
+ quote_node = AST.instantiate(:ast_quotation, literal)
119
+ quote_node << parse_datum
120
+ quote_node
121
+ end
122
+
123
+ DERIVED_IDENTIFIERS = [
124
+ "cond", "case", "and", "or", "when", "unless",
125
+ "let", "let*", "letrec", "letrec*",
126
+ "let-values", "let*-values",
127
+ "begin", "do",
128
+ "delay", "delay-force",
129
+ "parameterize",
130
+ "guard",
131
+ "case-lambda",
132
+ ]
133
+
134
+ def parse_list_expression
135
+ node = nil
136
+ skip_lparen
137
+ type, literal = *@lexer.peek_token
138
+ case type
139
+ when :rparen
140
+ # an empty list
141
+ node = AST.instantiate(:ast_empty_list, nil)
142
+ skip_rparen
143
+ when :identifier
144
+ case literal
145
+ when "lambda"
146
+ node = parse_lambda_expression
147
+ when "if"
148
+ node = parse_conditional
149
+ when "set!"
150
+ node = parse_assignment
151
+ when "let-syntax", "letrec-syntax"
152
+ node = parse_macro_block
153
+ when "define", "define-syntax", "define-values", "define-record-type"
154
+ node = parse_definition
155
+ when "include", "include-ci"
156
+ node = parse_includer
157
+ when *DERIVED_IDENTIFIERS
158
+ node = parse_derived_expression
159
+ end
160
+ end
161
+ node || parse_procedure_call
162
+ end
163
+
164
+ def parse_procedure_call
165
+ proc_call_node = AST.instantiate(:ast_procedure_call, nil)
166
+ proc_call_node.operator = parse_operator
167
+ Kernel.loop {
168
+ break if @lexer.peek_token.type == :rparen
169
+ proc_call_node.add_operand(parse_operand)
170
+ }
171
+ skip_rparen
172
+ proc_call_node
173
+ end
174
+
175
+ def parse_operator
176
+ parse_expression
177
+ end
178
+
179
+ def parse_operand
180
+ parse_expression
181
+ end
182
+
183
+ def parse_lambda_expression
184
+ @lexer.skip_token # skip :lambda
185
+ lambda_node = make_lambda_expression_node(parse_formals, read_body)
186
+ skip_rparen
187
+ lambda_node
188
+ end
189
+
190
+ def make_lambda_expression_node(formals, body)
191
+ lambda_node = AST.instantiate(:ast_lambda_expression, nil)
192
+ lambda_node.formals = formals
193
+ lambda_node.body = body
194
+ lambda_node
195
+ end
196
+
197
+ def parse_formals
198
+ formals_node = nil
199
+ # type 1: <identifier>
200
+ # type 2: ( <identifier 1> <identifier 2> ... )
201
+ # type 3: ( <identifier 1> <identifier 2> <dot> <identifier n> )
202
+ # => not supported yet
203
+ if @lexer.peek_token.type == :lparen
204
+ formals_node = AST.instantiate(:ast_formals, nil)
205
+ skip_lparen
206
+ Kernel.loop {
207
+ break if @lexer.peek_token.type == :rparen
208
+ formals_node.add_identifier(parse_identifier)
209
+ }
210
+ else # type 1
211
+ formals_node = parse_identifier
212
+ end
213
+ skip_rparen
214
+ formals_node
215
+ end
216
+
217
+ def read_body
218
+ body = []
219
+ Kernel.loop {
220
+ break if @lexer.peek_token.type == :rparen # the end of lambda exp.
221
+ body << parse_expression
222
+ }
223
+ body
224
+ end
225
+
226
+ def parse_conditional
227
+ if_node = AST.instantiate(:ast_conditional, @lexer.next_token.literal)
228
+ if_node.test = parse_test
229
+ if_node.consequent = parse_consequent
230
+ if @lexer.peek_token.type != :rparen
231
+ if_node.alternate = parse_alternate
232
+ end
233
+ skip_rparen
234
+ if_node
235
+ end
236
+
237
+ def parse_test
238
+ parse_expression
239
+ end
240
+
241
+ def parse_consequent
242
+ parse_expression
243
+ end
244
+
245
+ def parse_alternate
246
+ parse_expression
247
+ end
248
+
249
+ def parse_assignment
250
+ assignment_node = AST.instantiate(:ast_assignment, @lexer.next_token.literal)
251
+ assignment_node.identifier = parse_identifier
252
+ assignment_node.expression = parse_expression
253
+ skip_rparen
254
+ assignment_node
255
+ end
256
+
257
+ def parse_macro_block
258
+ not_implemented_yet("MACRO block")
259
+ end
260
+
261
+ def parse_definition
262
+ case @lexer.peek_token.literal
263
+ when "define"
264
+ parse_identifier_definition
265
+ when "define-syntax"
266
+ parse_define_syntax
267
+ when "define-values"
268
+ parse_define_values
269
+ when "define-record-type"
270
+ parse_define_record_type
271
+ else
272
+ raise SchemeSyntaxErrorError, @lexer.peek_token.literal
273
+ end
274
+ end
275
+
276
+ def parse_identifier_definition
277
+ # type 1: (define foo 3)
278
+ # type 2: (define bar (lambda (x y) (+ x y)))
279
+ # type 3: (define (hoge n m) (display n) (display m) (* n m))
280
+ define_node = AST.instantiate(:ast_identifier_definition, @lexer.next_token.literal)
281
+
282
+ case @lexer.peek_token.type
283
+ when :identifier
284
+ # type 1 and type 2
285
+ define_node.identifier = parse_identifier
286
+ define_node.expression = parse_expression
287
+ skip_rparen
288
+ when :lparen
289
+ # type 3:
290
+ # make a lambda expression, then handle as type 2
291
+ skip_lparen
292
+ define_node.identifier = parse_identifier
293
+ def_formals_node = AST.instantiate(:ast_formals, nil)
294
+ Kernel.loop {
295
+ break if @lexer.peek_token.type == :rparen
296
+ def_formals_node.add_identifier(parse_identifier)
297
+ }
298
+ skip_rparen
299
+
300
+ lambda_node = make_lambda_expression_node(def_formals_node, read_body)
301
+ skip_rparen
302
+
303
+ define_node.expression = lambda_node
304
+ else
305
+ raise SchemeSyntaxErrorError, @lexer.peek_token.literal
306
+ end
307
+
308
+ define_node
309
+ end
310
+
311
+ def parse_define_syntax
312
+ not_implemented_yet("DEFINE-SYNTAX")
313
+ end
314
+
315
+ def parse_define_values
316
+ not_implemented_yet("DEFINE-VALUES")
317
+ end
318
+
319
+ def parse_define_record_type
320
+ not_implemented_yet("DEFINE-RECORD-TYPE")
321
+ end
322
+
323
+ def parse_includer
324
+ not_implemented_yet("INCLUDE or INCLUDE-CI")
325
+ end
326
+
327
+ def parse_derived_expression
328
+ literal = @lexer.next_token.literal
329
+ name = compose_method_name("parse_", literal).intern
330
+ if self.respond_to?(name)
331
+ m = self.method(name)
332
+ m.call
333
+ else
334
+ not_implemented_yet(literal)
335
+ end
336
+ end
337
+
338
+ def parse_cond
339
+ cond_node = AST.instantiate(:ast_cond, nil)
340
+ Kernel.loop {
341
+ break if @lexer.peek_token.type == :rparen
342
+ cond_node.add_clause(parse_cond_clause)
343
+ }
344
+ skip_rparen
345
+ cond_node
346
+ end
347
+
348
+ def parse_cond_clause
349
+ skip_lparen
350
+ clause_node = AST.instantiate(:ast_cond_clause, nil)
351
+ # type 1: ( <test> )
352
+ # type 2: ( <test> => <expression> )
353
+ # type 3: ( <test> <sequence> )
354
+ # type 4: ( else <sequence> )
355
+ clause_node.test = parse_test
356
+ Kernel.loop {
357
+ break if @lexer.peek_token.type == :rparen
358
+ clause_node.add_expression(parse_expression)
359
+ }
360
+ skip_rparen
361
+ clause_node
362
+ end
363
+
364
+ def parse_case
365
+ not_implemented_yet("CASE")
366
+ end
367
+
368
+ def parse_and
369
+ parse_logical_test("and")
370
+ end
371
+
372
+ def parse_or
373
+ parse_logical_test("or")
374
+ end
375
+
376
+ def parse_logical_test(type)
377
+ ast_type = "ast_#{type}".intern
378
+ node = AST.instantiate(ast_type, nil)
379
+ Kernel.loop {
380
+ break if @lexer.peek_token.type == :rparen
381
+ node << parse_test
382
+ }
383
+ skip_rparen
384
+ node
385
+ end
386
+
387
+ def parse_when
388
+ parse_test_and_sequence("when")
389
+ end
390
+
391
+ def parse_unless
392
+ parse_test_and_sequence("unless")
393
+ end
394
+
395
+ def parse_test_and_sequence(type)
396
+ ast_type = "ast_#{type}".intern
397
+ node = AST.instantiate(ast_type, nil)
398
+ node.test = parse_test
399
+ Kernel.loop {
400
+ break if @lexer.peek_token.type == :rparen
401
+ node << parse_expression
402
+ }
403
+ skip_rparen
404
+ node
405
+ end
406
+
407
+ def parse_let
408
+ let_node = AST.instantiate(:ast_let, nil)
409
+ if @lexer.peek_token.type == :identifier
410
+ let_node.identifier = parse_identifier
411
+ end
412
+ let_node.bindings = parse_bindings
413
+ let_node.body = read_body
414
+ skip_rparen
415
+ let_node
416
+ end
417
+
418
+ def parse_bindings
419
+ bindings_node = AST.instantiate(:ast_bindings, nil)
420
+ skip_lparen
421
+ Kernel.loop {
422
+ break if @lexer.peek_token.type == :rparen
423
+ bindings_node.add_bind_spec(parse_bind_spec)
424
+ }
425
+ skip_rparen
426
+ bindings_node
427
+ end
428
+
429
+ def parse_bind_spec
430
+ spec_node = AST.instantiate(:ast_bind_spec, nil)
431
+ skip_lparen
432
+ spec_node.identifier = parse_identifier
433
+ spec_node.expression = parse_expression
434
+ skip_rparen
435
+ spec_node
436
+ end
437
+
438
+ def parse_let_star
439
+ parse_let_base("let_star")
440
+ end
441
+
442
+ def parse_letrec
443
+ parse_let_base("letrec")
444
+ end
445
+
446
+ def parse_letrec_star
447
+ parse_let_base("letrec_star")
448
+ end
449
+
450
+ def parse_let_base(type)
451
+ ast_type = "ast_#{type}".intern
452
+ node = AST.instantiate(ast_type, nil)
453
+ node.bindings = parse_bindings
454
+ node.body = read_body
455
+ skip_rparen
456
+ node
457
+ end
458
+
459
+ def parse_let_values
460
+ not_implemented_yet("LET-VALUES")
461
+ end
462
+
463
+ def parse_let_star_values
464
+ not_implemented_yet("LET*-VALUES")
465
+ end
466
+
467
+ def parse_begin
468
+ begin_node = AST.instantiate(:ast_begin, nil)
469
+ Kernel.loop {
470
+ break if @lexer.peek_token.type == :rparen
471
+ begin_node << parse_expression
472
+ }
473
+ skip_rparen
474
+ begin_node
475
+ end
476
+
477
+ def parse_do
478
+ do_node = AST.instantiate(:ast_do, nil)
479
+ do_node.iteration_bindings = parse_iteration_bindings
480
+ do_node.test_and_do_result = parse_test_and_do_result
481
+ Kernel.loop {
482
+ break if @lexer.peek_token.type == :rparen
483
+ do_node.add_command(parse_expression)
484
+ }
485
+ skip_rparen
486
+ do_node
487
+ end
488
+
489
+ def parse_iteration_bindings
490
+ node = AST.instantiate(:ast_iteration_bindings, nil)
491
+ skip_lparen
492
+ Kernel.loop {
493
+ break if @lexer.peek_token.type == :rparen
494
+ node.add_iteration_spec(parse_iteration_spec)
495
+ }
496
+ skip_rparen
497
+ node
498
+ end
499
+
500
+ def parse_iteration_spec
501
+ spec_node = AST.instantiate(:ast_iteration_spec, nil)
502
+ skip_lparen
503
+ spec_node.identifier = parse_identifier
504
+ spec_node.init = parse_init
505
+ if @lexer.peek_token.type != :rparen
506
+ spec_node.step = parse_step
507
+ end
508
+ skip_rparen
509
+ spec_node
510
+ end
511
+
512
+ def parse_init
513
+ parse_expression
514
+ end
515
+
516
+ def parse_step
517
+ parse_expression
518
+ end
519
+
520
+ def parse_test_and_do_result
521
+ node = AST.instantiate(:ast_test_and_do_result, nil)
522
+ skip_lparen
523
+ node << parse_test
524
+ Kernel.loop {
525
+ break if @lexer.peek_token.type == :rparen
526
+ node.add_expression(parse_expression)
527
+ }
528
+ skip_rparen
529
+ node
530
+ end
531
+
532
+ def parse_delay
533
+ not_implemented_yet("DELAY")
534
+ end
535
+
536
+ def parse_delay_force
537
+ not_implemented_yet("DELAY-FORCE")
538
+ end
539
+
540
+ def parse_parameterize
541
+ not_implemented_yet("PARAMETERIZE")
542
+ end
543
+
544
+ def parse_guard
545
+ not_implemented_yet("GUARD")
546
+ end
547
+
548
+ def parse_case_lambda
549
+ not_implemented_yet("CASE-LAMBDA")
550
+ end
551
+
552
+ private
553
+
554
+ def skip_lparen
555
+ if @lexer.peek_token.type == :lparen
556
+ @lexer.skip_token
557
+ else
558
+ raise UnexpectedTokenTypeError.new(@lexer.peek_token.type, :lparen)
559
+ end
560
+ end
561
+
562
+ def skip_rparen
563
+ if @lexer.peek_token.type == :rparen
564
+ @lexer.skip_token
565
+ else
566
+ raise MissingRightParenthesisError
567
+ end
568
+ end
569
+
570
+ def ast_simple_type(token_type)
571
+ case token_type
572
+ when :identifier
573
+ :ast_identifier
574
+ when :boolean, :character, :number, :string
575
+ "ast_#{token_type}".intern
576
+ when :dot
577
+ :ast_dot
578
+ else
579
+ :ast_identifier
580
+ end
581
+ end
582
+
583
+ def ast_compound_type(token_type)
584
+ case token_type
585
+ when :vec_lparen
586
+ :ast_vector
587
+ else
588
+ :ast_list
589
+ end
590
+ end
591
+
592
+ SCM_CHAR_TO_RB_MAP = {
593
+ "*" => "_star",
594
+ "-" => "_",
595
+ }
596
+
597
+ def compose_method_name(prefix, type_name)
598
+ converted_name = type_name.gsub(/[*\-]/, SCM_CHAR_TO_RB_MAP)
599
+ prefix + converted_name
600
+ end
601
+
602
+ def not_implemented_yet(feature)
603
+ raise NotImplementedYetError, feature
604
+ end
605
+
606
+ end # end of Parser class
607
+
608
+ end