rubasteme 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ module Rubasteme
5
5
 
6
6
  # :stopdoc:
7
7
  EMSG = {
8
- scheme_sytanx_error: "syntax error: got=%s",
8
+ scheme_syntax_error: "syntax error: %s",
9
9
  unexpected_token_type: "unexpected token type: got=%s, expected=%s",
10
10
  missing_right_parenthesis: "missing right parenthesis",
11
11
  unsupported_feature: "unsupported feature: %s",
@@ -15,8 +15,8 @@ module Rubasteme
15
15
 
16
16
  # Indicates a syntax error as Scheme program.
17
17
  class SchemeSyntaxErrorError < Error
18
- def initialize(literal)
19
- super(EMSG[:scheme_syntax_error] % literal)
18
+ def initialize(msg)
19
+ super(EMSG[:scheme_syntax_error] % msg)
20
20
  end
21
21
  end
22
22
 
@@ -3,675 +3,54 @@
3
3
  module Rubasteme
4
4
 
5
5
  def self.parser
6
- Parser.new
6
+ Parser::Parser.new
7
7
  end
8
8
 
9
- class Parser
9
+ module Parser
10
10
 
11
11
  def self.version
12
- "(rubasteme.parser :version #{VERSION} :release #{RELEASE})"
12
+ Rubasteme.send(:make_version, self.name)
13
13
  end
14
14
 
15
- def initialize
16
- @lexer = nil
17
- end
18
-
19
- def parse(lexer)
20
- return [] if lexer.nil?
21
- @lexer = lexer
22
- parse_program
23
- end
24
-
25
- def parse_program
26
- ast_program = AST.instantiate(:ast_program, nil)
27
- Kernel.loop {
28
- ast_program << parse_expression
29
- }
30
- ast_program
31
- end
32
-
33
- def parse_expression
34
- if start_delimiter?(@lexer.peek_token)
35
- parse_compound_expression
36
- else
37
- parse_simple_expression
38
- end
39
- end
40
-
41
- def parse_simple_expression
42
- type, literal = *@lexer.next_token
43
- AST.instantiate(ast_simple_type(type), literal)
44
- end
45
-
46
- def parse_identifier
47
- parse_simple_expression
48
- end
49
-
50
- TOKEN_START_DELIMITERS = [ # :nodoc:
51
- :lparen, # list: ( ... )
52
- :vec_lparen, # vector: #( ... )
53
- :bytevec_lparen, # bytevector: #u8( ... )
54
- :quotation, # quotation: '<something>
55
- :backquote, # quasiquote: `<something>
56
- :comma, # used in quasiquote
57
- :comma_at, # used in quasiquote
58
- :comment_lparen, # comment start
59
- ]
60
-
61
- def start_delimiter?(token)
62
- TOKEN_START_DELIMITERS.include?(token.type)
63
- end
64
-
65
- def parse_compound_expression
66
- case @lexer.peek_token.type
67
- when :vec_lparen
68
- parse_vector
69
- when :quotation
70
- parse_quotation
71
- when :lparen
72
- parse_list_expression
73
- else
74
- raise SchemeSyntaxError, @lexer.peek_token.literal
75
- end
76
- end
77
-
78
- def parse_vector
79
- parse_data_to_matched_rparen
80
- end
81
-
82
- def parse_data_to_matched_rparen
83
- token = @lexer.next_token
84
- node = AST.instantiate(ast_compound_type(token.type), nil)
85
- Kernel.loop {
86
- break if @lexer.peek_token.type == :rparen
87
- node << parse_datum
88
- }
89
- skip_rparen
90
-
91
- node
92
- end
93
-
94
- def parse_datum
95
- if start_delimiter?(@lexer.peek_token)
96
- parse_compound_datum
97
- else
98
- parse_simple_datum
99
- end
100
- end
101
-
102
- def parse_simple_datum
103
- parse_simple_expression
104
- end
105
-
106
- def parse_compound_datum
107
- case @lexer.peek_token.type
108
- when :lparen
109
- parse_data_list
110
- when :vec_lparen
111
- parse_vector
112
- else
113
- parse_simple_expression
114
- end
115
- end
116
-
117
- def parse_data_list
118
- parse_data_to_matched_rparen
119
- end
120
-
121
- def parse_quotation
122
- literal = @lexer.next_token.literal
123
- quote_node = AST.instantiate(:ast_quotation, literal)
124
- quote_node << parse_datum
125
- quote_node
126
- end
127
-
128
- DERIVED_IDENTIFIERS = [
129
- "cond", "case", "and", "or", "when", "unless",
130
- "let", "let*", "letrec", "letrec*",
131
- "let-values", "let*-values",
132
- "begin", "do",
133
- "delay", "delay-force",
134
- "parameterize",
135
- "guard",
136
- "case-lambda",
137
- ]
138
-
139
- def parse_list_expression
140
- node = nil
141
- skip_lparen
142
- type, literal = *@lexer.peek_token
143
- case type
144
- when :rparen
145
- # an empty list
146
- node = AST.instantiate(:ast_empty_list, nil)
147
- skip_rparen
148
- when :identifier
149
- case literal
150
- when "lambda"
151
- node = parse_lambda_expression
152
- when "if"
153
- node = parse_conditional
154
- when "set!"
155
- node = parse_assignment
156
- when "let-syntax", "letrec-syntax"
157
- node = parse_macro_block
158
- when "define", "define-syntax", "define-values", "define-record-type"
159
- node = parse_definition
160
- when "include", "include-ci"
161
- node = parse_includer
162
- when *DERIVED_IDENTIFIERS
163
- node = parse_derived_expression
164
- end
165
- end
166
- node || parse_procedure_call
167
- end
168
-
169
- def parse_procedure_call
170
- proc_call_node = AST.instantiate(:ast_procedure_call, nil)
171
- proc_call_node.operator = parse_operator
172
- Kernel.loop {
173
- break if @lexer.peek_token.type == :rparen
174
- proc_call_node.add_operand(parse_operand)
175
- }
176
- skip_rparen
177
- proc_call_node
178
- end
179
-
180
- def parse_operator
181
- parse_expression
182
- end
183
-
184
- def parse_operand
185
- parse_expression
186
- end
187
-
188
- def parse_lambda_expression
189
- @lexer.skip_token # skip :lambda
190
- lambda_node = make_lambda_expression_node(parse_formals, parse_body)
191
- skip_rparen
192
- lambda_node
193
- end
194
-
195
- def make_lambda_expression_node(formals, body)
196
- lambda_node = AST.instantiate(:ast_lambda_expression, nil)
197
- lambda_node.formals = formals
198
- lambda_node.body = body
199
- lambda_node
200
- end
201
-
202
- def parse_formals
203
- formals_node = nil
204
- # type 1: <identifier>
205
- # type 2: ( <identifier 1> <identifier 2> ... )
206
- # type 3: ( <identifier 1> <identifier 2> <dot> <identifier n> )
207
- # => not supported yet
208
- if @lexer.peek_token.type == :lparen
209
- formals_node = AST.instantiate(:ast_formals, nil)
210
- skip_lparen
211
- Kernel.loop {
212
- break if @lexer.peek_token.type == :rparen
213
- formals_node.add_identifier(parse_identifier)
214
- }
215
- else # type 1
216
- formals_node = parse_identifier
217
- end
218
- skip_rparen
219
- formals_node
220
- end
221
-
222
- def parse_body
223
- body = AST.instantiate(:ast_body, nil)
224
-
225
- definitions = AST.instantiate(:ast_internal_definitions, nil)
226
- body.definitions = definitions
227
-
228
- # read definitions
229
- Kernel.loop {
230
- break if @lexer.peek_token.type == :rparen # the end of lambda exp.
231
- break unless is_definition?
232
- body.definitions.add_definition(parse_expression)
233
- }
234
-
235
- body.sequence = parse_sequence
236
- body
237
- end
238
-
239
- def parse_sequence
240
- seq_node = AST.instantiate(:ast_sequence, nil)
241
- Kernel.loop {
242
- break if @lexer.peek_token.type == :rparen # the end of parent list
243
- exp = parse_expression
244
- seq_node.add_expression(exp)
245
- }
246
- seq_node
247
- end
248
-
249
- def parse_conditional
250
- if_node = AST.instantiate(:ast_conditional, @lexer.next_token.literal)
251
- if_node.test = parse_test
252
- if_node.consequent = parse_consequent
253
- if @lexer.peek_token.type != :rparen
254
- if_node.alternate = parse_alternate
255
- end
256
- skip_rparen
257
- if_node
258
- end
259
-
260
- def parse_test
261
- parse_expression
262
- end
263
-
264
- def parse_consequent
265
- parse_expression
266
- end
267
-
268
- def parse_alternate
269
- parse_expression
270
- end
271
-
272
- def parse_assignment
273
- assignment_node = AST.instantiate(:ast_assignment, @lexer.next_token.literal)
274
- assignment_node.identifier = parse_identifier
275
- assignment_node.expression = parse_expression
276
- skip_rparen
277
- assignment_node
278
- end
279
-
280
- def parse_macro_block
281
- not_implemented_yet("MACRO block")
282
- end
283
-
284
- DEFINITION_IDENTIFIERS = [
285
- "define",
286
- "define-syntax",
287
- "define-values",
288
- "define-record-type",
289
- ]
290
-
291
- def is_definition?
292
- if @lexer.peek_token.type == :lparen
293
- type, literal = *@lexer.peek_token(1)
294
- type == :identifier and DEFINITION_IDENTIFIERS.include?(literal)
295
- else
296
- false
297
- end
298
- end
299
-
300
- def parse_definition
301
- case @lexer.peek_token.literal
302
- when "define"
303
- parse_identifier_definition
304
- when "define-syntax"
305
- parse_define_syntax
306
- when "define-values"
307
- parse_define_values
308
- when "define-record-type"
309
- parse_define_record_type
310
- else
311
- raise SchemeSyntaxErrorError, @lexer.peek_token.literal
312
- end
313
- end
314
-
315
- def parse_identifier_definition
316
- # type 1: (define foo 3)
317
- # type 2: (define bar (lambda (x y) (+ x y)))
318
- # type 3: (define (hoge n m) (display n) (display m) (* n m))
319
- define_node = AST.instantiate(:ast_identifier_definition, @lexer.next_token.literal)
320
-
321
- case @lexer.peek_token.type
322
- when :identifier
323
- # type 1 and type 2
324
- define_node.identifier = parse_identifier
325
- define_node.expression = parse_expression
326
- skip_rparen
327
- when :lparen
328
- # type 3:
329
- # make a lambda expression, then handle as type 2
330
- skip_lparen
331
- define_node.identifier = parse_identifier
332
- def_formals_node = AST.instantiate(:ast_formals, nil)
333
- Kernel.loop {
334
- break if @lexer.peek_token.type == :rparen
335
- def_formals_node.add_identifier(parse_identifier)
336
- }
337
- skip_rparen
338
-
339
- lambda_node = make_lambda_expression_node(def_formals_node, parse_body)
340
- skip_rparen
341
-
342
- define_node.expression = lambda_node
343
- else
344
- raise SchemeSyntaxErrorError, @lexer.peek_token.literal
345
- end
346
-
347
- define_node
348
- end
349
-
350
- def parse_define_syntax
351
- not_implemented_yet("DEFINE-SYNTAX")
352
- end
353
-
354
- def parse_define_values
355
- not_implemented_yet("DEFINE-VALUES")
356
- end
357
-
358
- def parse_define_record_type
359
- not_implemented_yet("DEFINE-RECORD-TYPE")
360
- end
361
-
362
- def parse_includer
363
- not_implemented_yet("INCLUDE or INCLUDE-CI")
364
- end
365
-
366
- def parse_derived_expression
367
- literal = @lexer.next_token.literal
368
- name = compose_method_name("parse_", literal).intern
369
- if self.respond_to?(name)
370
- m = self.method(name)
371
- m.call
372
- else
373
- not_implemented_yet(literal)
15
+ module Utils
16
+ def ast?(obj)
17
+ obj.kind_of?(AST::Node)
374
18
  end
375
- end
376
-
377
- def parse_cond
378
- cond_node = AST.instantiate(:ast_cond, nil)
379
- Kernel.loop {
380
- break if @lexer.peek_token.type == :rparen
381
- cond_node.add_clause(parse_cond_clause)
382
- }
383
- skip_rparen
384
- cond_node
385
- end
386
19
 
387
- def parse_cond_clause
388
- skip_lparen
389
- # type 1: ( <test> )
390
- # type 2: ( <test> <sequence> )
391
- # type 3: ( <test> => <recipient> )
392
- # type 4: ( else <sequence> )
393
- if identifier_name?(@lexer.peek_token, "else")
394
- # type 4
395
- parse_else_clause
396
- else
397
- clause_node = nil
398
- test_node = parse_test
399
- if identifier_name?(@lexer.peek_token, "=>")
400
- # type 3
401
- @lexer.skip_token # skip "=>"
402
- clause_node = AST.instantiate(:ast_cond_recipient_clause, nil)
403
- clause_node.recipient = parse_recipient
404
- else
405
- # type 1 and 2
406
- clause_node = AST.instantiate(:ast_cond_clause, nil)
407
- clause_node.sequence = parse_sequence
408
- end
409
- skip_rparen
410
- clause_node.test = test_node
411
- clause_node
20
+ def ast_type?(obj, type)
21
+ ast?(obj) && obj.type == type
412
22
  end
413
- end
414
-
415
- def parse_else_clause
416
- @lexer.skip_token # skip "else"
417
- else_clause_node = AST.instantiate(:ast_else_clause, nil)
418
- else_clause_node.sequence = parse_sequence
419
- skip_rparen
420
- else_clause_node
421
- end
422
-
423
- def parse_else_recipient_clause
424
- @lexer.skip_token # skip "else"
425
- @lexer.skip_token # skip "=>"
426
- else_recipient_clause_node = AST.instantiate(:ast_else_recipient_clause, nil)
427
- else_recipient_clause_node = parse_recipient
428
- else_recipient_clause_node
429
- end
430
-
431
- def parse_recipient
432
- parse_expression
433
- end
434
-
435
- def parse_case
436
- not_implemented_yet("CASE")
437
- end
438
-
439
- def parse_and
440
- parse_logical_test("and")
441
- end
442
-
443
- def parse_or
444
- parse_logical_test("or")
445
- end
446
-
447
- def parse_logical_test(type)
448
- ast_type = "ast_#{type}".intern
449
- node = AST.instantiate(ast_type, nil)
450
- Kernel.loop {
451
- break if @lexer.peek_token.type == :rparen
452
- node << parse_test
453
- }
454
- skip_rparen
455
- node
456
- end
457
-
458
- def parse_when
459
- parse_test_and_sequence("when")
460
- end
461
-
462
- def parse_unless
463
- parse_test_and_sequence("unless")
464
- end
465
-
466
- def parse_test_and_sequence(type)
467
- ast_type = "ast_#{type}".intern
468
- node = AST.instantiate(ast_type, nil)
469
- node.test = parse_test
470
- node.sequence = parse_sequence
471
- skip_rparen
472
- node
473
- end
474
-
475
- def parse_let
476
- let_node = AST.instantiate(:ast_let, nil)
477
- if @lexer.peek_token.type == :identifier
478
- let_node.identifier = parse_identifier
479
- end
480
- let_node.bindings = parse_bindings
481
- let_node.body = parse_body
482
- skip_rparen
483
- let_node
484
- end
485
-
486
- def parse_bindings
487
- bindings_node = AST.instantiate(:ast_bindings, nil)
488
- skip_lparen
489
- Kernel.loop {
490
- break if @lexer.peek_token.type == :rparen
491
- bindings_node.add_bind_spec(parse_bind_spec)
492
- }
493
- skip_rparen
494
- bindings_node
495
- end
496
-
497
- def parse_bind_spec
498
- spec_node = AST.instantiate(:ast_bind_spec, nil)
499
- skip_lparen
500
- spec_node.identifier = parse_identifier
501
- spec_node.expression = parse_expression
502
- skip_rparen
503
- spec_node
504
- end
505
-
506
- def parse_let_star
507
- parse_let_base("let_star")
508
- end
509
-
510
- def parse_letrec
511
- parse_let_base("letrec")
512
- end
513
-
514
- def parse_letrec_star
515
- parse_let_base("letrec_star")
516
- end
517
-
518
- def parse_let_base(type)
519
- ast_type = "ast_#{type}".intern
520
- node = AST.instantiate(ast_type, nil)
521
- node.bindings = parse_bindings
522
- node.body = parse_body
523
- skip_rparen
524
- node
525
- end
526
-
527
- def parse_let_values
528
- not_implemented_yet("LET-VALUES")
529
- end
530
-
531
- def parse_let_star_values
532
- not_implemented_yet("LET*-VALUES")
533
- end
534
-
535
- def parse_begin
536
- begin_node = AST.instantiate(:ast_begin, nil)
537
- begin_node.sequence = parse_sequence
538
- skip_rparen
539
- begin_node
540
- end
541
23
 
542
- def parse_do
543
- do_node = AST.instantiate(:ast_do, nil)
544
- do_node.iteration_bindings = parse_iteration_bindings
545
- do_node.test_and_do_result = parse_test_and_do_result
546
- Kernel.loop {
547
- break if @lexer.peek_token.type == :rparen
548
- do_node.add_command(parse_expression)
549
- }
550
- skip_rparen
551
- do_node
552
- end
553
-
554
- def parse_iteration_bindings
555
- node = AST.instantiate(:ast_iteration_bindings, nil)
556
- skip_lparen
557
- Kernel.loop {
558
- break if @lexer.peek_token.type == :rparen
559
- node.add_iteration_spec(parse_iteration_spec)
560
- }
561
- skip_rparen
562
- node
563
- end
564
-
565
- def parse_iteration_spec
566
- spec_node = AST.instantiate(:ast_iteration_spec, nil)
567
- skip_lparen
568
- spec_node.identifier = parse_identifier
569
- spec_node.init = parse_init
570
- if @lexer.peek_token.type != :rparen
571
- spec_node.step = parse_step
24
+ def not_implemented_yet(feature)
25
+ raise NotImplementedYetError, feature
572
26
  end
573
- skip_rparen
574
- spec_node
575
27
  end
576
28
 
577
- def parse_init
578
- parse_expression
579
- end
29
+ require_relative "parser/phase1_parser"
30
+ require_relative "parser/phase2_parser"
580
31
 
581
- def parse_step
582
- parse_expression
583
- end
32
+ class Parser
33
+ include Utils
584
34
 
585
- def parse_test_and_do_result
586
- node = AST.instantiate(:ast_test_and_do_result, nil)
587
- skip_lparen
588
- node << parse_test
589
- Kernel.loop {
590
- break if @lexer.peek_token.type == :rparen
591
- node.add_expression(parse_expression)
592
- }
593
- skip_rparen
594
- node
595
- end
596
-
597
- def parse_delay
598
- not_implemented_yet("DELAY")
599
- end
600
-
601
- def parse_delay_force
602
- not_implemented_yet("DELAY-FORCE")
603
- end
604
-
605
- def parse_parameterize
606
- not_implemented_yet("PARAMETERIZE")
607
- end
608
-
609
- def parse_guard
610
- not_implemented_yet("GUARD")
611
- end
612
-
613
- def parse_case_lambda
614
- not_implemented_yet("CASE-LAMBDA")
615
- end
616
-
617
- private
618
-
619
- def skip_lparen
620
- if @lexer.peek_token.type == :lparen
621
- @lexer.skip_token
622
- else
623
- raise UnexpectedTokenTypeError.new(@lexer.peek_token.type, :lparen)
35
+ def self.version
36
+ Rubasteme::Parser.send(:version)
624
37
  end
625
- end
626
38
 
627
- def skip_rparen
628
- if @lexer.peek_token.type == :rparen
629
- @lexer.skip_token
630
- else
631
- raise MissingRightParenthesisError
39
+ def version
40
+ self.class.version
632
41
  end
633
- end
634
42
 
635
- def ast_simple_type(token_type)
636
- case token_type
637
- when :identifier
638
- :ast_identifier
639
- when :boolean, :character, :number, :string
640
- "ast_#{token_type}".intern
641
- when :dot
642
- :ast_dot
643
- else
644
- :ast_illegal
43
+ def initialize
44
+ @p1 = Phase1Parser.new
45
+ @p2 = Phase2Parser.new
645
46
  end
646
- end
647
47
 
648
- def ast_compound_type(token_type)
649
- case token_type
650
- when :vec_lparen
651
- :ast_vector
652
- else
653
- :ast_list
48
+ def parse(lexer)
49
+ return [] if lexer.nil?
50
+ ast_program = AST.instantiate(:ast_program)
51
+ Kernel.loop{ast_program << @p2.parse(@p1.parse(lexer))}
52
+ ast_program
654
53
  end
655
- end
656
-
657
- def identifier_name?(token, name)
658
- token.type == :identifier && token.literal == name
659
- end
660
-
661
- SCM_CHAR_TO_RB_MAP = {
662
- "*" => "_star",
663
- "-" => "_",
664
- }
665
-
666
- def compose_method_name(prefix, type_name)
667
- converted_name = type_name.gsub(/[*\-]/, SCM_CHAR_TO_RB_MAP)
668
- prefix + converted_name
669
- end
670
-
671
- def not_implemented_yet(feature)
672
- raise NotImplementedYetError, feature
673
- end
674
-
675
- end # end of Parser class
676
-
54
+ end # end of Parser class
55
+ end # end of Parser module
677
56
  end