toy_lang 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +21 -3
- data/Rakefile +5 -0
- data/lib/toy_lang/parser.rb +171 -0
- data/lib/toy_lang/scanner.rb +89 -0
- data/lib/toy_lang/token.rb +18 -0
- data/lib/toy_lang/version.rb +1 -1
- data/lib/toy_lang.rb +3 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/toy_lang/parser_spec.rb +43 -0
- data/spec/toy_lang/scanner_spec.rb +121 -0
- metadata +14 -5
data/README.md
CHANGED
@@ -1,6 +1,16 @@
|
|
1
1
|
# ToyLang
|
2
2
|
|
3
|
-
|
3
|
+
Parser for a simple languange. An example program would be:
|
4
|
+
|
5
|
+
def fibbo(number) {
|
6
|
+
if number == 0 { return 0 }
|
7
|
+
if number == 1 { return 1 }
|
8
|
+
return fibbo(number-1) + fibbo(number-2)
|
9
|
+
}
|
10
|
+
|
11
|
+
fibbo(5)
|
12
|
+
|
13
|
+
This program should output 8
|
4
14
|
|
5
15
|
## Installation
|
6
16
|
|
@@ -17,8 +27,16 @@ Or install it yourself as:
|
|
17
27
|
$ gem install toy_lang
|
18
28
|
|
19
29
|
## Usage
|
20
|
-
|
21
|
-
|
30
|
+
|
31
|
+
# Create a parser
|
32
|
+
@parser = ToyLang::Parser.new
|
33
|
+
# Set the program
|
34
|
+
@parser.program = "methodname(1,3)"
|
35
|
+
# Generate the AST
|
36
|
+
puts @parser.program
|
37
|
+
|
38
|
+
TODO: Find better names to avoid collision between 'program =' and
|
39
|
+
program
|
22
40
|
|
23
41
|
## Contributing
|
24
42
|
|
data/Rakefile
CHANGED
@@ -0,0 +1,171 @@
|
|
1
|
+
module ToyLang
|
2
|
+
|
3
|
+
# This it the class that parses the toy language
|
4
|
+
# grammatical rules are lower cased (e.g. statement)
|
5
|
+
# tokens are upper case (e.g. COMMA)
|
6
|
+
# optional rules are surrounded by parentheses
|
7
|
+
#
|
8
|
+
# The toy language grammar is as follows
|
9
|
+
#
|
10
|
+
# program =>
|
11
|
+
# statement*
|
12
|
+
# statement =>
|
13
|
+
# function_definition |
|
14
|
+
# conditional_expression |
|
15
|
+
# function_call |
|
16
|
+
# return_statement
|
17
|
+
# function_definition =>
|
18
|
+
# function_header OPEN_BLOCK expression* CLOSE_BLOCK
|
19
|
+
# function_header =>
|
20
|
+
# DEF IDENTIFIER OPEN_PARENTHESES argument_list CLOSE_PARENTHESES
|
21
|
+
# argument_list =>
|
22
|
+
# (IDENTIFIER ( COMMA IDENTIFIER)*)
|
23
|
+
# conditional_expression =>
|
24
|
+
# IF condition OPEN_BLOCK expression* CLOSE_BLOCK
|
25
|
+
# expression =>
|
26
|
+
# additive_expression
|
27
|
+
# additive_expression =>
|
28
|
+
# substraction_expression PLUS substraction_expression
|
29
|
+
# substraction_expression =>
|
30
|
+
# primary_expresion MINUS primary_expresion
|
31
|
+
# primary_expresion =>
|
32
|
+
# NUMBER
|
33
|
+
# function_call =>
|
34
|
+
# IDENTIFIER OPEN_PARENTHESES parameter_list CLOSE_PARENTHESES
|
35
|
+
# parameter_list =>
|
36
|
+
# (expression ( COMMA expression)*)
|
37
|
+
# return_statement =>
|
38
|
+
# RETURN expression
|
39
|
+
#
|
40
|
+
# An example program would be
|
41
|
+
# def fibbo(number) {
|
42
|
+
# if number == 0 { return 0 }
|
43
|
+
# if number == 1 { return 1 }
|
44
|
+
# return fibbo(number-1) + fibbo(number-2)
|
45
|
+
# }
|
46
|
+
# fibbo(5)
|
47
|
+
#
|
48
|
+
# This program should output 8
|
49
|
+
class Parser
|
50
|
+
|
51
|
+
def program=(program)
|
52
|
+
@scanner = Scanner.new
|
53
|
+
@scanner.set_program(program)
|
54
|
+
end
|
55
|
+
|
56
|
+
# statement =>
|
57
|
+
# function_definition |
|
58
|
+
# conditional_expression |
|
59
|
+
# function_call |
|
60
|
+
# return_statement
|
61
|
+
def statement
|
62
|
+
# ast => Abstract Syntax Tree
|
63
|
+
if ((ast = function_definition) != nil)
|
64
|
+
return ast
|
65
|
+
elsif ((ast = conditional_expression) != nil)
|
66
|
+
return ast
|
67
|
+
elsif ((ast = function_call) != nil)
|
68
|
+
return ast
|
69
|
+
elsif ((ast = return_statement) != nil)
|
70
|
+
return ast
|
71
|
+
end
|
72
|
+
throw :parser_exception
|
73
|
+
end
|
74
|
+
|
75
|
+
# function_definition =>
|
76
|
+
# function_header OPEN_BLOCK expression* CLOSE_BLOCK
|
77
|
+
def function_definition
|
78
|
+
return nil
|
79
|
+
end
|
80
|
+
|
81
|
+
# conditional_expression =>
|
82
|
+
# IF condition OPEN_BLOCK expression* CLOSE_BLOCK
|
83
|
+
def conditional_expression
|
84
|
+
return nil
|
85
|
+
end
|
86
|
+
|
87
|
+
# function_call =>
|
88
|
+
# IDENTIFIER OPEN_PARENTHESES parameter_list CLOSE_PARENTHESES
|
89
|
+
def function_call
|
90
|
+
unless tokens_are?(:id, :open_parentheses)
|
91
|
+
return nil
|
92
|
+
end
|
93
|
+
|
94
|
+
method_name = @scanner.get_next_token.content
|
95
|
+
@scanner.get_next_token # open parentheses
|
96
|
+
params = parameter_list()
|
97
|
+
|
98
|
+
# Verify close parentheses
|
99
|
+
if token_is_not? :close_parentheses
|
100
|
+
throw :parser_exception
|
101
|
+
end
|
102
|
+
|
103
|
+
@scanner.get_next_token # close parentheses
|
104
|
+
|
105
|
+
return { function_call: method_name, params: params }
|
106
|
+
end
|
107
|
+
|
108
|
+
# parameter_list =>
|
109
|
+
# (expression ( COMMA expression)*)
|
110
|
+
def parameter_list
|
111
|
+
expression_list = []
|
112
|
+
expr = expression()
|
113
|
+
return [] if expr == nil
|
114
|
+
|
115
|
+
expression_list << expr
|
116
|
+
|
117
|
+
while (token_is? :comma)
|
118
|
+
@scanner.get_next_token # the comma
|
119
|
+
expr = expression()
|
120
|
+
expression_list << expr if expr != nil
|
121
|
+
end
|
122
|
+
|
123
|
+
expression_list
|
124
|
+
end
|
125
|
+
|
126
|
+
# return_statement =>
|
127
|
+
# RETURN expression
|
128
|
+
def return_statement
|
129
|
+
unless token_is? :return
|
130
|
+
return nil
|
131
|
+
end
|
132
|
+
|
133
|
+
@scanner.get_next_token
|
134
|
+
return {return: expression()}
|
135
|
+
end
|
136
|
+
|
137
|
+
# expression =>
|
138
|
+
# ....
|
139
|
+
# !!! INCOMPLETE IMPLEMENTATION !!!
|
140
|
+
# To get going, expression can only be a number
|
141
|
+
# TODO: Do it for real
|
142
|
+
def expression
|
143
|
+
if token_is_not? :number
|
144
|
+
nil
|
145
|
+
end
|
146
|
+
|
147
|
+
token = @scanner.get_next_token
|
148
|
+
return { number: token.content }
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def token_is?(token)
|
154
|
+
tokens_are?(token)
|
155
|
+
end
|
156
|
+
|
157
|
+
def token_is_not?(token)
|
158
|
+
not token_is? token
|
159
|
+
end
|
160
|
+
|
161
|
+
def tokens_are?(*tokens)
|
162
|
+
look_ahead_index = 1
|
163
|
+
tokens.each do |token|
|
164
|
+
return false if @scanner.look_ahead(look_ahead_index).is_not? token
|
165
|
+
look_ahead_index += 1
|
166
|
+
end
|
167
|
+
return true
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module ToyLang
|
2
|
+
class Scanner
|
3
|
+
|
4
|
+
# Tokens the scanner generates
|
5
|
+
# :return => for 'return' tokens
|
6
|
+
# :def => for 'def' tokens
|
7
|
+
# :number => for regexp '\d+'
|
8
|
+
# :id => for '[a-z]+'
|
9
|
+
# :open_block => for '{'
|
10
|
+
# :close_block => for '}'
|
11
|
+
# :eof => for end of file
|
12
|
+
|
13
|
+
IDENTIFIER = /\A[a-z]+/
|
14
|
+
WHITESPACE = /\A\s+/
|
15
|
+
|
16
|
+
LANGUAGE_TOKENS = {
|
17
|
+
number: /\A\d+/,
|
18
|
+
open_block: /\A\{/,
|
19
|
+
close_block: /\A\}/,
|
20
|
+
open_parentheses: /\A\(/,
|
21
|
+
close_parentheses: /\A\)/,
|
22
|
+
comma: /\A,/
|
23
|
+
}
|
24
|
+
|
25
|
+
RESERVED_WORDS = %w[return def]
|
26
|
+
|
27
|
+
def set_program(program)
|
28
|
+
@program = program
|
29
|
+
@token_list =[] # used to keep tokens in look_aheads
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_next_token
|
33
|
+
if @token_list.empty?
|
34
|
+
consume_token
|
35
|
+
else
|
36
|
+
@token_list.shift
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def look_ahead(number_of_tokens = 1)
|
41
|
+
end_of_file_met = false
|
42
|
+
while @token_list.size < number_of_tokens
|
43
|
+
throw :scanner_exception if end_of_file_met
|
44
|
+
token = consume_token
|
45
|
+
@token_list << token
|
46
|
+
end_of_file_met = token.is? :eof
|
47
|
+
end
|
48
|
+
@token_list[number_of_tokens - 1]
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def identifier
|
54
|
+
ident = consume(IDENTIFIER)
|
55
|
+
# Check if the token is part of the reserved words
|
56
|
+
return Token.new(ident.to_sym, ident) if RESERVED_WORDS.include? ident
|
57
|
+
return Token.new(:id,ident)
|
58
|
+
end
|
59
|
+
|
60
|
+
def consume_token
|
61
|
+
clear_whitespace
|
62
|
+
if @program.size == 0
|
63
|
+
return Token.new(:eof)
|
64
|
+
elsif @program =~ IDENTIFIER
|
65
|
+
return identifier
|
66
|
+
end
|
67
|
+
|
68
|
+
# Check for language symbols
|
69
|
+
LANGUAGE_TOKENS.each do |symbol, reg_exp|
|
70
|
+
if @program =~ reg_exp
|
71
|
+
return Token.new(symbol, consume(reg_exp))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
throw :scanner_exception # Unrecognized token
|
76
|
+
end
|
77
|
+
|
78
|
+
def clear_whitespace
|
79
|
+
consume(WHITESPACE)
|
80
|
+
end
|
81
|
+
|
82
|
+
def consume(regexp)
|
83
|
+
content = @program[regexp]
|
84
|
+
@program.gsub!(regexp,"")
|
85
|
+
content
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ToyLang
|
2
|
+
class Token
|
3
|
+
attr_reader :symbol, :content
|
4
|
+
|
5
|
+
def initialize(symbol, content = nil)
|
6
|
+
@symbol = symbol
|
7
|
+
@content = content
|
8
|
+
end
|
9
|
+
|
10
|
+
def is?(symbol)
|
11
|
+
@symbol == symbol
|
12
|
+
end
|
13
|
+
|
14
|
+
def is_not?(symbol)
|
15
|
+
not is? symbol
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/toy_lang/version.rb
CHANGED
data/lib/toy_lang.rb
CHANGED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ToyLang::Parser do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@parser = ToyLang::Parser.new
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "return statement" do
|
10
|
+
it "parses" do
|
11
|
+
@parser.program = "return 2"
|
12
|
+
@parser.statement.should == {return: { number: "2" }}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "function call" do
|
17
|
+
it "parses function" do
|
18
|
+
@parser.program = "methodname(1,3)"
|
19
|
+
@parser.statement.should == { function_call: 'methodname',
|
20
|
+
params: [ {number: "1"}, {number: "3"} ]}
|
21
|
+
end
|
22
|
+
|
23
|
+
it "throws parser_exception when no closing parentheses" do
|
24
|
+
@parser.program = "methodname(1,3"
|
25
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
26
|
+
end
|
27
|
+
|
28
|
+
it "throws parser_exception when no further expression after comma" do
|
29
|
+
@parser.program = "methodname(1,"
|
30
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
31
|
+
end
|
32
|
+
|
33
|
+
it "throws parser_exception when first expression empty" do
|
34
|
+
@parser.program = "methodname(,3)"
|
35
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
36
|
+
end
|
37
|
+
|
38
|
+
it "throws parser_exception when middle expression empty" do
|
39
|
+
@parser.program = "methodname(1,,3)"
|
40
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ToyLang::Scanner do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@scanner = ToyLang::Scanner.new
|
7
|
+
end
|
8
|
+
|
9
|
+
it "returns :eof when no tokens left" do
|
10
|
+
@scanner.set_program("")
|
11
|
+
assert_token_is :eof
|
12
|
+
end
|
13
|
+
|
14
|
+
it "clears white spaces" do
|
15
|
+
@scanner.set_program(" \n\t")
|
16
|
+
assert_token_is :eof
|
17
|
+
end
|
18
|
+
|
19
|
+
it "returns :return when token is 'return'" do
|
20
|
+
@scanner.set_program("return")
|
21
|
+
assert_token_is :return
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns :def when token is 'def'" do
|
25
|
+
@scanner.set_program("def")
|
26
|
+
assert_token_is :def
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns :id when token is not a reserved word" do
|
30
|
+
@scanner.set_program("classic")
|
31
|
+
assert_token_is :id
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns token content when token is not a reserved word" do
|
35
|
+
@scanner.set_program("classic")
|
36
|
+
assert_token_content_is "classic"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "returns :number when token is digits" do
|
40
|
+
@scanner.set_program("9823")
|
41
|
+
assert_token_is :number
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns content when token is digits" do
|
45
|
+
@scanner.set_program("9823")
|
46
|
+
assert_token_content_is "9823"
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns :open_block when token is '{'" do
|
50
|
+
@scanner.set_program("{")
|
51
|
+
assert_token_is :open_block
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns :close_block when token is '}'" do
|
55
|
+
@scanner.set_program("}")
|
56
|
+
assert_token_is :close_block
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns :open_parentheses when token is '('" do
|
60
|
+
@scanner.set_program("(")
|
61
|
+
assert_token_is :open_parentheses
|
62
|
+
end
|
63
|
+
|
64
|
+
it "returns :close_parentheses when token is ')'" do
|
65
|
+
@scanner.set_program(")")
|
66
|
+
assert_token_is :close_parentheses
|
67
|
+
end
|
68
|
+
|
69
|
+
it "scans small program" do
|
70
|
+
@scanner.set_program """
|
71
|
+
def method {
|
72
|
+
return 9
|
73
|
+
}
|
74
|
+
"""
|
75
|
+
assert_token_is :def
|
76
|
+
assert_token_and_content_is :id, "method"
|
77
|
+
assert_token_is :open_block
|
78
|
+
assert_token_is :return
|
79
|
+
assert_token_and_content_is :number, "9"
|
80
|
+
assert_token_is :close_block
|
81
|
+
assert_token_is :eof
|
82
|
+
end
|
83
|
+
|
84
|
+
describe "look_ahead" do
|
85
|
+
it "without parameters looks one ahead" do
|
86
|
+
@scanner.set_program("token")
|
87
|
+
@scanner.look_ahead.content.should == "token"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "with parameter looks ahead 'n' tokens" do
|
91
|
+
@scanner.set_program("def method")
|
92
|
+
@scanner.look_ahead(2).content.should == "method"
|
93
|
+
end
|
94
|
+
|
95
|
+
it "does not consume token (e.g. get_next_token gets the next token)" do
|
96
|
+
@scanner.set_program("token")
|
97
|
+
@scanner.look_ahead
|
98
|
+
@scanner.get_next_token.content.should == "token"
|
99
|
+
end
|
100
|
+
|
101
|
+
it "looking ahead of :eof throws exception" do
|
102
|
+
@scanner.set_program("")
|
103
|
+
expect { @scanner.look_ahead(2) }.to throw_symbol :scanner_exception
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def assert_token_content_is(content)
|
108
|
+
@scanner.get_next_token.content.should == content
|
109
|
+
end
|
110
|
+
|
111
|
+
def assert_token_is(symbol)
|
112
|
+
@scanner.get_next_token.symbol.should be symbol
|
113
|
+
end
|
114
|
+
|
115
|
+
def assert_token_and_content_is(symbol, content)
|
116
|
+
token = @scanner.get_next_token
|
117
|
+
token.symbol.should be symbol
|
118
|
+
token.content.should == content
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toy_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -56,7 +56,13 @@ files:
|
|
56
56
|
- README.md
|
57
57
|
- Rakefile
|
58
58
|
- lib/toy_lang.rb
|
59
|
+
- lib/toy_lang/parser.rb
|
60
|
+
- lib/toy_lang/scanner.rb
|
61
|
+
- lib/toy_lang/token.rb
|
59
62
|
- lib/toy_lang/version.rb
|
63
|
+
- spec/spec_helper.rb
|
64
|
+
- spec/toy_lang/parser_spec.rb
|
65
|
+
- spec/toy_lang/scanner_spec.rb
|
60
66
|
- toy_lang.gemspec
|
61
67
|
homepage: ''
|
62
68
|
licenses: []
|
@@ -72,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
78
|
version: '0'
|
73
79
|
segments:
|
74
80
|
- 0
|
75
|
-
hash: -
|
81
|
+
hash: -3501641450620113713
|
76
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
83
|
none: false
|
78
84
|
requirements:
|
@@ -81,11 +87,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
87
|
version: '0'
|
82
88
|
segments:
|
83
89
|
- 0
|
84
|
-
hash: -
|
90
|
+
hash: -3501641450620113713
|
85
91
|
requirements: []
|
86
92
|
rubyforge_project:
|
87
93
|
rubygems_version: 1.8.24
|
88
94
|
signing_key:
|
89
95
|
specification_version: 3
|
90
96
|
summary: Toy Language parser and scanner to play with language compilation
|
91
|
-
test_files:
|
97
|
+
test_files:
|
98
|
+
- spec/spec_helper.rb
|
99
|
+
- spec/toy_lang/parser_spec.rb
|
100
|
+
- spec/toy_lang/scanner_spec.rb
|