rubasteme 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubasteme
4
+
5
+ module AST
6
+ class LeafNode < Node
7
+ def initialize(literal)
8
+ super
9
+ @literal = literal
10
+ end
11
+
12
+ attr_reader :literal
13
+
14
+ def to_a
15
+ [type, @literal]
16
+ end
17
+ end
18
+
19
+ class EmptyListNode < LeafNode
20
+ def initialize(_ = nil)
21
+ super("()")
22
+ end
23
+ end
24
+
25
+ class BooleanNode < LeafNode
26
+ def initialize(literal)
27
+ super
28
+ end
29
+ end
30
+
31
+ class IdentifierNode < LeafNode
32
+ def initialize(literal)
33
+ super
34
+ end
35
+ end
36
+
37
+ class CharacterNode < LeafNode
38
+ def initialize(literal)
39
+ super
40
+ end
41
+ end
42
+
43
+ class StringNode < LeafNode
44
+ def initialize(literal)
45
+ super
46
+ end
47
+ end
48
+
49
+ class NumberNode < LeafNode
50
+ def initialize(literal)
51
+ super
52
+ end
53
+ end
54
+
55
+ class DotNode < LeafNode
56
+ def initialize(literal)
57
+ super
58
+ end
59
+ end
60
+
61
+ end # end of AST
62
+ end
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubasteme
4
+ class Error < StandardError; end
5
+
6
+ # :stopdoc:
7
+ EMSG = {
8
+ scheme_sytanx_error: "syntax error: got=%s",
9
+ unexpected_token_type: "unexpected token type: got=%s, expected=%s",
10
+ missing_right_parenthesis: "missing right parenthesis",
11
+ unsupported_feature: "unsupported feature: %s",
12
+ not_implemented_yet: "not implemented yet: %s",
13
+ }
14
+ # :startdoc:
15
+
16
+ # Indicates a syntax error as Scheme program.
17
+ class SchemeSyntaxErrorError < Error
18
+ def initialize(literal)
19
+ super(EMSG[:scheme_syntax_error] % literal)
20
+ end
21
+ end
22
+
23
+ # Indicates a token is not expected one.
24
+ class UnexpectedTokenTypeError < Error
25
+ def initialize(got, expected = nil)
26
+ super(EMSG[:unexpected_token_type] % [got, expected])
27
+ end
28
+ end
29
+
30
+ # Indicates a mismatch of parenthesizes.
31
+ class MissingRightParenthesisError < Error
32
+ def initialize
33
+ super(EMSG[:missing_right_parenthesis])
34
+ end
35
+ end
36
+
37
+ # Indicates a feature is not supported in Rubasteme.
38
+ class UnsupportedFeatureError < Error
39
+ def initialize(feature)
40
+ super(EMSG[:unsupported_feature] % feature)
41
+ end
42
+ end
43
+
44
+ # Indicates a feature is not implemented in the current release .
45
+ class NotImplementedYetError < Error
46
+ def initialize(feature)
47
+ super(EMSG[:not_implemented_yet] % feature)
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,608 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rubasteme
4
+
5
+ def self.parser(lexer)
6
+ Parser.new(lexer)
7
+ end
8
+
9
+ class Parser
10
+
11
+ def initialize(lexer)
12
+ @lexer = lexer
13
+ end
14
+
15
+ def parse
16
+ return [] if @lexer.nil?
17
+ parse_program
18
+ end
19
+
20
+ def parse_program
21
+ ast_program = AST.instantiate(:ast_program, nil)
22
+ Kernel.loop {
23
+ ast_program << parse_expression
24
+ }
25
+ ast_program
26
+ end
27
+
28
+ def parse_expression
29
+ if start_delimiter?(@lexer.peek_token)
30
+ parse_compound_expression
31
+ else
32
+ parse_simple_expression
33
+ end
34
+ end
35
+
36
+ def parse_simple_expression
37
+ type, literal = *@lexer.next_token
38
+ AST.instantiate(ast_simple_type(type), literal)
39
+ end
40
+
41
+ def parse_identifier
42
+ parse_simple_expression
43
+ end
44
+
45
+ TOKEN_START_DELIMITERS = [ # :nodoc:
46
+ :lparen, # list: ( ... )
47
+ :vec_lparen, # vector: #( ... )
48
+ :bytevec_lparen, # bytevector: #u8( ... )
49
+ :quotation, # quotation: '<something>
50
+ :backquote, # quasiquote: `<something>
51
+ :comma, # used in quasiquote
52
+ :comma_at, # used in quasiquote
53
+ :comment_lparen, # comment start
54
+ ]
55
+
56
+ def start_delimiter?(token)
57
+ TOKEN_START_DELIMITERS.include?(token.type)
58
+ end
59
+
60
+ def parse_compound_expression
61
+ case @lexer.peek_token.type
62
+ when :vec_lparen
63
+ parse_vector
64
+ when :quotation
65
+ parse_quotation
66
+ when :lparen
67
+ parse_list_expression
68
+ else
69
+ raise SchemeSyntaxError, @lexer.peek_token.literal
70
+ end
71
+ end
72
+
73
+ def parse_vector
74
+ parse_data_to_matched_rparen
75
+ end
76
+
77
+ def parse_data_to_matched_rparen
78
+ token = @lexer.next_token
79
+ node = AST.instantiate(ast_compound_type(token.type), nil)
80
+ Kernel.loop {
81
+ break if @lexer.peek_token.type == :rparen
82
+ node << parse_datum
83
+ }
84
+ skip_rparen
85
+
86
+ node
87
+ end
88
+
89
+ def parse_datum
90
+ if start_delimiter?(@lexer.peek_token)
91
+ parse_compound_datum
92
+ else
93
+ parse_simple_datum
94
+ end
95
+ end
96
+
97
+ def parse_simple_datum
98
+ parse_simple_expression
99
+ end
100
+
101
+ def parse_compound_datum
102
+ case @lexer.peek_token.type
103
+ when :lparen
104
+ parse_data_list
105
+ when :vec_lparen
106
+ parse_vector
107
+ else
108
+ parse_simple_expression
109
+ end
110
+ end
111
+
112
+ def parse_data_list
113
+ parse_data_to_matched_rparen
114
+ end
115
+
116
+ def parse_quotation
117
+ literal = @lexer.next_token.literal
118
+ quote_node = AST.instantiate(:ast_quotation, literal)
119
+ quote_node << parse_datum
120
+ quote_node
121
+ end
122
+
123
+ DERIVED_IDENTIFIERS = [
124
+ "cond", "case", "and", "or", "when", "unless",
125
+ "let", "let*", "letrec", "letrec*",
126
+ "let-values", "let*-values",
127
+ "begin", "do",
128
+ "delay", "delay-force",
129
+ "parameterize",
130
+ "guard",
131
+ "case-lambda",
132
+ ]
133
+
134
+ def parse_list_expression
135
+ node = nil
136
+ skip_lparen
137
+ type, literal = *@lexer.peek_token
138
+ case type
139
+ when :rparen
140
+ # an empty list
141
+ node = AST.instantiate(:ast_empty_list, nil)
142
+ skip_rparen
143
+ when :identifier
144
+ case literal
145
+ when "lambda"
146
+ node = parse_lambda_expression
147
+ when "if"
148
+ node = parse_conditional
149
+ when "set!"
150
+ node = parse_assignment
151
+ when "let-syntax", "letrec-syntax"
152
+ node = parse_macro_block
153
+ when "define", "define-syntax", "define-values", "define-record-type"
154
+ node = parse_definition
155
+ when "include", "include-ci"
156
+ node = parse_includer
157
+ when *DERIVED_IDENTIFIERS
158
+ node = parse_derived_expression
159
+ end
160
+ end
161
+ node || parse_procedure_call
162
+ end
163
+
164
+ def parse_procedure_call
165
+ proc_call_node = AST.instantiate(:ast_procedure_call, nil)
166
+ proc_call_node.operator = parse_operator
167
+ Kernel.loop {
168
+ break if @lexer.peek_token.type == :rparen
169
+ proc_call_node.add_operand(parse_operand)
170
+ }
171
+ skip_rparen
172
+ proc_call_node
173
+ end
174
+
175
+ def parse_operator
176
+ parse_expression
177
+ end
178
+
179
+ def parse_operand
180
+ parse_expression
181
+ end
182
+
183
+ def parse_lambda_expression
184
+ @lexer.skip_token # skip :lambda
185
+ lambda_node = make_lambda_expression_node(parse_formals, read_body)
186
+ skip_rparen
187
+ lambda_node
188
+ end
189
+
190
+ def make_lambda_expression_node(formals, body)
191
+ lambda_node = AST.instantiate(:ast_lambda_expression, nil)
192
+ lambda_node.formals = formals
193
+ lambda_node.body = body
194
+ lambda_node
195
+ end
196
+
197
+ def parse_formals
198
+ formals_node = nil
199
+ # type 1: <identifier>
200
+ # type 2: ( <identifier 1> <identifier 2> ... )
201
+ # type 3: ( <identifier 1> <identifier 2> <dot> <identifier n> )
202
+ # => not supported yet
203
+ if @lexer.peek_token.type == :lparen
204
+ formals_node = AST.instantiate(:ast_formals, nil)
205
+ skip_lparen
206
+ Kernel.loop {
207
+ break if @lexer.peek_token.type == :rparen
208
+ formals_node.add_identifier(parse_identifier)
209
+ }
210
+ else # type 1
211
+ formals_node = parse_identifier
212
+ end
213
+ skip_rparen
214
+ formals_node
215
+ end
216
+
217
+ def read_body
218
+ body = []
219
+ Kernel.loop {
220
+ break if @lexer.peek_token.type == :rparen # the end of lambda exp.
221
+ body << parse_expression
222
+ }
223
+ body
224
+ end
225
+
226
+ def parse_conditional
227
+ if_node = AST.instantiate(:ast_conditional, @lexer.next_token.literal)
228
+ if_node.test = parse_test
229
+ if_node.consequent = parse_consequent
230
+ if @lexer.peek_token.type != :rparen
231
+ if_node.alternate = parse_alternate
232
+ end
233
+ skip_rparen
234
+ if_node
235
+ end
236
+
237
+ def parse_test
238
+ parse_expression
239
+ end
240
+
241
+ def parse_consequent
242
+ parse_expression
243
+ end
244
+
245
+ def parse_alternate
246
+ parse_expression
247
+ end
248
+
249
+ def parse_assignment
250
+ assignment_node = AST.instantiate(:ast_assignment, @lexer.next_token.literal)
251
+ assignment_node.identifier = parse_identifier
252
+ assignment_node.expression = parse_expression
253
+ skip_rparen
254
+ assignment_node
255
+ end
256
+
257
+ def parse_macro_block
258
+ not_implemented_yet("MACRO block")
259
+ end
260
+
261
+ def parse_definition
262
+ case @lexer.peek_token.literal
263
+ when "define"
264
+ parse_identifier_definition
265
+ when "define-syntax"
266
+ parse_define_syntax
267
+ when "define-values"
268
+ parse_define_values
269
+ when "define-record-type"
270
+ parse_define_record_type
271
+ else
272
+ raise SchemeSyntaxErrorError, @lexer.peek_token.literal
273
+ end
274
+ end
275
+
276
+ def parse_identifier_definition
277
+ # type 1: (define foo 3)
278
+ # type 2: (define bar (lambda (x y) (+ x y)))
279
+ # type 3: (define (hoge n m) (display n) (display m) (* n m))
280
+ define_node = AST.instantiate(:ast_identifier_definition, @lexer.next_token.literal)
281
+
282
+ case @lexer.peek_token.type
283
+ when :identifier
284
+ # type 1 and type 2
285
+ define_node.identifier = parse_identifier
286
+ define_node.expression = parse_expression
287
+ skip_rparen
288
+ when :lparen
289
+ # type 3:
290
+ # make a lambda expression, then handle as type 2
291
+ skip_lparen
292
+ define_node.identifier = parse_identifier
293
+ def_formals_node = AST.instantiate(:ast_formals, nil)
294
+ Kernel.loop {
295
+ break if @lexer.peek_token.type == :rparen
296
+ def_formals_node.add_identifier(parse_identifier)
297
+ }
298
+ skip_rparen
299
+
300
+ lambda_node = make_lambda_expression_node(def_formals_node, read_body)
301
+ skip_rparen
302
+
303
+ define_node.expression = lambda_node
304
+ else
305
+ raise SchemeSyntaxErrorError, @lexer.peek_token.literal
306
+ end
307
+
308
+ define_node
309
+ end
310
+
311
+ def parse_define_syntax
312
+ not_implemented_yet("DEFINE-SYNTAX")
313
+ end
314
+
315
+ def parse_define_values
316
+ not_implemented_yet("DEFINE-VALUES")
317
+ end
318
+
319
+ def parse_define_record_type
320
+ not_implemented_yet("DEFINE-RECORD-TYPE")
321
+ end
322
+
323
+ def parse_includer
324
+ not_implemented_yet("INCLUDE or INCLUDE-CI")
325
+ end
326
+
327
+ def parse_derived_expression
328
+ literal = @lexer.next_token.literal
329
+ name = compose_method_name("parse_", literal).intern
330
+ if self.respond_to?(name)
331
+ m = self.method(name)
332
+ m.call
333
+ else
334
+ not_implemented_yet(literal)
335
+ end
336
+ end
337
+
338
+ def parse_cond
339
+ cond_node = AST.instantiate(:ast_cond, nil)
340
+ Kernel.loop {
341
+ break if @lexer.peek_token.type == :rparen
342
+ cond_node.add_clause(parse_cond_clause)
343
+ }
344
+ skip_rparen
345
+ cond_node
346
+ end
347
+
348
+ def parse_cond_clause
349
+ skip_lparen
350
+ clause_node = AST.instantiate(:ast_cond_clause, nil)
351
+ # type 1: ( <test> )
352
+ # type 2: ( <test> => <expression> )
353
+ # type 3: ( <test> <sequence> )
354
+ # type 4: ( else <sequence> )
355
+ clause_node.test = parse_test
356
+ Kernel.loop {
357
+ break if @lexer.peek_token.type == :rparen
358
+ clause_node.add_expression(parse_expression)
359
+ }
360
+ skip_rparen
361
+ clause_node
362
+ end
363
+
364
+ def parse_case
365
+ not_implemented_yet("CASE")
366
+ end
367
+
368
+ def parse_and
369
+ parse_logical_test("and")
370
+ end
371
+
372
+ def parse_or
373
+ parse_logical_test("or")
374
+ end
375
+
376
+ def parse_logical_test(type)
377
+ ast_type = "ast_#{type}".intern
378
+ node = AST.instantiate(ast_type, nil)
379
+ Kernel.loop {
380
+ break if @lexer.peek_token.type == :rparen
381
+ node << parse_test
382
+ }
383
+ skip_rparen
384
+ node
385
+ end
386
+
387
+ def parse_when
388
+ parse_test_and_sequence("when")
389
+ end
390
+
391
+ def parse_unless
392
+ parse_test_and_sequence("unless")
393
+ end
394
+
395
+ def parse_test_and_sequence(type)
396
+ ast_type = "ast_#{type}".intern
397
+ node = AST.instantiate(ast_type, nil)
398
+ node.test = parse_test
399
+ Kernel.loop {
400
+ break if @lexer.peek_token.type == :rparen
401
+ node << parse_expression
402
+ }
403
+ skip_rparen
404
+ node
405
+ end
406
+
407
+ def parse_let
408
+ let_node = AST.instantiate(:ast_let, nil)
409
+ if @lexer.peek_token.type == :identifier
410
+ let_node.identifier = parse_identifier
411
+ end
412
+ let_node.bindings = parse_bindings
413
+ let_node.body = read_body
414
+ skip_rparen
415
+ let_node
416
+ end
417
+
418
+ def parse_bindings
419
+ bindings_node = AST.instantiate(:ast_bindings, nil)
420
+ skip_lparen
421
+ Kernel.loop {
422
+ break if @lexer.peek_token.type == :rparen
423
+ bindings_node.add_bind_spec(parse_bind_spec)
424
+ }
425
+ skip_rparen
426
+ bindings_node
427
+ end
428
+
429
+ def parse_bind_spec
430
+ spec_node = AST.instantiate(:ast_bind_spec, nil)
431
+ skip_lparen
432
+ spec_node.identifier = parse_identifier
433
+ spec_node.expression = parse_expression
434
+ skip_rparen
435
+ spec_node
436
+ end
437
+
438
+ def parse_let_star
439
+ parse_let_base("let_star")
440
+ end
441
+
442
+ def parse_letrec
443
+ parse_let_base("letrec")
444
+ end
445
+
446
+ def parse_letrec_star
447
+ parse_let_base("letrec_star")
448
+ end
449
+
450
+ def parse_let_base(type)
451
+ ast_type = "ast_#{type}".intern
452
+ node = AST.instantiate(ast_type, nil)
453
+ node.bindings = parse_bindings
454
+ node.body = read_body
455
+ skip_rparen
456
+ node
457
+ end
458
+
459
+ def parse_let_values
460
+ not_implemented_yet("LET-VALUES")
461
+ end
462
+
463
+ def parse_let_star_values
464
+ not_implemented_yet("LET*-VALUES")
465
+ end
466
+
467
+ def parse_begin
468
+ begin_node = AST.instantiate(:ast_begin, nil)
469
+ Kernel.loop {
470
+ break if @lexer.peek_token.type == :rparen
471
+ begin_node << parse_expression
472
+ }
473
+ skip_rparen
474
+ begin_node
475
+ end
476
+
477
+ def parse_do
478
+ do_node = AST.instantiate(:ast_do, nil)
479
+ do_node.iteration_bindings = parse_iteration_bindings
480
+ do_node.test_and_do_result = parse_test_and_do_result
481
+ Kernel.loop {
482
+ break if @lexer.peek_token.type == :rparen
483
+ do_node.add_command(parse_expression)
484
+ }
485
+ skip_rparen
486
+ do_node
487
+ end
488
+
489
+ def parse_iteration_bindings
490
+ node = AST.instantiate(:ast_iteration_bindings, nil)
491
+ skip_lparen
492
+ Kernel.loop {
493
+ break if @lexer.peek_token.type == :rparen
494
+ node.add_iteration_spec(parse_iteration_spec)
495
+ }
496
+ skip_rparen
497
+ node
498
+ end
499
+
500
+ def parse_iteration_spec
501
+ spec_node = AST.instantiate(:ast_iteration_spec, nil)
502
+ skip_lparen
503
+ spec_node.identifier = parse_identifier
504
+ spec_node.init = parse_init
505
+ if @lexer.peek_token.type != :rparen
506
+ spec_node.step = parse_step
507
+ end
508
+ skip_rparen
509
+ spec_node
510
+ end
511
+
512
+ def parse_init
513
+ parse_expression
514
+ end
515
+
516
+ def parse_step
517
+ parse_expression
518
+ end
519
+
520
+ def parse_test_and_do_result
521
+ node = AST.instantiate(:ast_test_and_do_result, nil)
522
+ skip_lparen
523
+ node << parse_test
524
+ Kernel.loop {
525
+ break if @lexer.peek_token.type == :rparen
526
+ node.add_expression(parse_expression)
527
+ }
528
+ skip_rparen
529
+ node
530
+ end
531
+
532
+ def parse_delay
533
+ not_implemented_yet("DELAY")
534
+ end
535
+
536
+ def parse_delay_force
537
+ not_implemented_yet("DELAY-FORCE")
538
+ end
539
+
540
+ def parse_parameterize
541
+ not_implemented_yet("PARAMETERIZE")
542
+ end
543
+
544
+ def parse_guard
545
+ not_implemented_yet("GUARD")
546
+ end
547
+
548
+ def parse_case_lambda
549
+ not_implemented_yet("CASE-LAMBDA")
550
+ end
551
+
552
+ private
553
+
554
+ def skip_lparen
555
+ if @lexer.peek_token.type == :lparen
556
+ @lexer.skip_token
557
+ else
558
+ raise UnexpectedTokenTypeError.new(@lexer.peek_token.type, :lparen)
559
+ end
560
+ end
561
+
562
+ def skip_rparen
563
+ if @lexer.peek_token.type == :rparen
564
+ @lexer.skip_token
565
+ else
566
+ raise MissingRightParenthesisError
567
+ end
568
+ end
569
+
570
+ def ast_simple_type(token_type)
571
+ case token_type
572
+ when :identifier
573
+ :ast_identifier
574
+ when :boolean, :character, :number, :string
575
+ "ast_#{token_type}".intern
576
+ when :dot
577
+ :ast_dot
578
+ else
579
+ :ast_identifier
580
+ end
581
+ end
582
+
583
+ def ast_compound_type(token_type)
584
+ case token_type
585
+ when :vec_lparen
586
+ :ast_vector
587
+ else
588
+ :ast_list
589
+ end
590
+ end
591
+
592
+ SCM_CHAR_TO_RB_MAP = {
593
+ "*" => "_star",
594
+ "-" => "_",
595
+ }
596
+
597
+ def compose_method_name(prefix, type_name)
598
+ converted_name = type_name.gsub(/[*\-]/, SCM_CHAR_TO_RB_MAP)
599
+ prefix + converted_name
600
+ end
601
+
602
+ def not_implemented_yet(feature)
603
+ raise NotImplementedYetError, feature
604
+ end
605
+
606
+ end # end of Parser class
607
+
608
+ end