toy_lang 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +21 -3
- data/Rakefile +5 -0
- data/lib/toy_lang/parser.rb +171 -0
- data/lib/toy_lang/scanner.rb +89 -0
- data/lib/toy_lang/token.rb +18 -0
- data/lib/toy_lang/version.rb +1 -1
- data/lib/toy_lang.rb +3 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/toy_lang/parser_spec.rb +43 -0
- data/spec/toy_lang/scanner_spec.rb +121 -0
- metadata +14 -5
data/README.md
CHANGED
@@ -1,6 +1,16 @@
|
|
1
1
|
# ToyLang
|
2
2
|
|
3
|
-
|
3
|
+
Parser for a simple languange. An example program would be:
|
4
|
+
|
5
|
+
def fibbo(number) {
|
6
|
+
if number == 0 { return 0 }
|
7
|
+
if number == 1 { return 1 }
|
8
|
+
return fibbo(number-1) + fibbo(number-2)
|
9
|
+
}
|
10
|
+
|
11
|
+
fibbo(5)
|
12
|
+
|
13
|
+
This program should output 8
|
4
14
|
|
5
15
|
## Installation
|
6
16
|
|
@@ -17,8 +27,16 @@ Or install it yourself as:
|
|
17
27
|
$ gem install toy_lang
|
18
28
|
|
19
29
|
## Usage
|
20
|
-
|
21
|
-
|
30
|
+
|
31
|
+
# Create a parser
|
32
|
+
@parser = ToyLang::Parser.new
|
33
|
+
# Set the program
|
34
|
+
@parser.program = "methodname(1,3)"
|
35
|
+
# Generate the AST
|
36
|
+
puts @parser.program
|
37
|
+
|
38
|
+
TODO: Find better names to avoid collision between 'program =' and
|
39
|
+
program
|
22
40
|
|
23
41
|
## Contributing
|
24
42
|
|
data/Rakefile
CHANGED
@@ -0,0 +1,171 @@
|
|
1
|
+
module ToyLang
|
2
|
+
|
3
|
+
# This it the class that parses the toy language
|
4
|
+
# grammatical rules are lower cased (e.g. statement)
|
5
|
+
# tokens are upper case (e.g. COMMA)
|
6
|
+
# optional rules are surrounded by parentheses
|
7
|
+
#
|
8
|
+
# The toy language grammar is as follows
|
9
|
+
#
|
10
|
+
# program =>
|
11
|
+
# statement*
|
12
|
+
# statement =>
|
13
|
+
# function_definition |
|
14
|
+
# conditional_expression |
|
15
|
+
# function_call |
|
16
|
+
# return_statement
|
17
|
+
# function_definition =>
|
18
|
+
# function_header OPEN_BLOCK expression* CLOSE_BLOCK
|
19
|
+
# function_header =>
|
20
|
+
# DEF IDENTIFIER OPEN_PARENTHESES argument_list CLOSE_PARENTHESES
|
21
|
+
# argument_list =>
|
22
|
+
# (IDENTIFIER ( COMMA IDENTIFIER)*)
|
23
|
+
# conditional_expression =>
|
24
|
+
# IF condition OPEN_BLOCK expression* CLOSE_BLOCK
|
25
|
+
# expression =>
|
26
|
+
# additive_expression
|
27
|
+
# additive_expression =>
|
28
|
+
# substraction_expression PLUS substraction_expression
|
29
|
+
# substraction_expression =>
|
30
|
+
# primary_expresion MINUS primary_expresion
|
31
|
+
# primary_expresion =>
|
32
|
+
# NUMBER
|
33
|
+
# function_call =>
|
34
|
+
# IDENTIFIER OPEN_PARENTHESES parameter_list CLOSE_PARENTHESES
|
35
|
+
# parameter_list =>
|
36
|
+
# (expression ( COMMA expression)*)
|
37
|
+
# return_statement =>
|
38
|
+
# RETURN expression
|
39
|
+
#
|
40
|
+
# An example program would be
|
41
|
+
# def fibbo(number) {
|
42
|
+
# if number == 0 { return 0 }
|
43
|
+
# if number == 1 { return 1 }
|
44
|
+
# return fibbo(number-1) + fibbo(number-2)
|
45
|
+
# }
|
46
|
+
# fibbo(5)
|
47
|
+
#
|
48
|
+
# This program should output 8
|
49
|
+
class Parser
|
50
|
+
|
51
|
+
def program=(program)
|
52
|
+
@scanner = Scanner.new
|
53
|
+
@scanner.set_program(program)
|
54
|
+
end
|
55
|
+
|
56
|
+
# statement =>
|
57
|
+
# function_definition |
|
58
|
+
# conditional_expression |
|
59
|
+
# function_call |
|
60
|
+
# return_statement
|
61
|
+
def statement
|
62
|
+
# ast => Abstract Syntax Tree
|
63
|
+
if ((ast = function_definition) != nil)
|
64
|
+
return ast
|
65
|
+
elsif ((ast = conditional_expression) != nil)
|
66
|
+
return ast
|
67
|
+
elsif ((ast = function_call) != nil)
|
68
|
+
return ast
|
69
|
+
elsif ((ast = return_statement) != nil)
|
70
|
+
return ast
|
71
|
+
end
|
72
|
+
throw :parser_exception
|
73
|
+
end
|
74
|
+
|
75
|
+
# function_definition =>
|
76
|
+
# function_header OPEN_BLOCK expression* CLOSE_BLOCK
|
77
|
+
def function_definition
|
78
|
+
return nil
|
79
|
+
end
|
80
|
+
|
81
|
+
# conditional_expression =>
|
82
|
+
# IF condition OPEN_BLOCK expression* CLOSE_BLOCK
|
83
|
+
def conditional_expression
|
84
|
+
return nil
|
85
|
+
end
|
86
|
+
|
87
|
+
# function_call =>
|
88
|
+
# IDENTIFIER OPEN_PARENTHESES parameter_list CLOSE_PARENTHESES
|
89
|
+
def function_call
|
90
|
+
unless tokens_are?(:id, :open_parentheses)
|
91
|
+
return nil
|
92
|
+
end
|
93
|
+
|
94
|
+
method_name = @scanner.get_next_token.content
|
95
|
+
@scanner.get_next_token # open parentheses
|
96
|
+
params = parameter_list()
|
97
|
+
|
98
|
+
# Verify close parentheses
|
99
|
+
if token_is_not? :close_parentheses
|
100
|
+
throw :parser_exception
|
101
|
+
end
|
102
|
+
|
103
|
+
@scanner.get_next_token # close parentheses
|
104
|
+
|
105
|
+
return { function_call: method_name, params: params }
|
106
|
+
end
|
107
|
+
|
108
|
+
# parameter_list =>
|
109
|
+
# (expression ( COMMA expression)*)
|
110
|
+
def parameter_list
|
111
|
+
expression_list = []
|
112
|
+
expr = expression()
|
113
|
+
return [] if expr == nil
|
114
|
+
|
115
|
+
expression_list << expr
|
116
|
+
|
117
|
+
while (token_is? :comma)
|
118
|
+
@scanner.get_next_token # the comma
|
119
|
+
expr = expression()
|
120
|
+
expression_list << expr if expr != nil
|
121
|
+
end
|
122
|
+
|
123
|
+
expression_list
|
124
|
+
end
|
125
|
+
|
126
|
+
# return_statement =>
|
127
|
+
# RETURN expression
|
128
|
+
def return_statement
|
129
|
+
unless token_is? :return
|
130
|
+
return nil
|
131
|
+
end
|
132
|
+
|
133
|
+
@scanner.get_next_token
|
134
|
+
return {return: expression()}
|
135
|
+
end
|
136
|
+
|
137
|
+
# expression =>
|
138
|
+
# ....
|
139
|
+
# !!! INCOMPLETE IMPLEMENTATION !!!
|
140
|
+
# To get going, expression can only be a number
|
141
|
+
# TODO: Do it for real
|
142
|
+
def expression
|
143
|
+
if token_is_not? :number
|
144
|
+
nil
|
145
|
+
end
|
146
|
+
|
147
|
+
token = @scanner.get_next_token
|
148
|
+
return { number: token.content }
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def token_is?(token)
|
154
|
+
tokens_are?(token)
|
155
|
+
end
|
156
|
+
|
157
|
+
def token_is_not?(token)
|
158
|
+
not token_is? token
|
159
|
+
end
|
160
|
+
|
161
|
+
def tokens_are?(*tokens)
|
162
|
+
look_ahead_index = 1
|
163
|
+
tokens.each do |token|
|
164
|
+
return false if @scanner.look_ahead(look_ahead_index).is_not? token
|
165
|
+
look_ahead_index += 1
|
166
|
+
end
|
167
|
+
return true
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
module ToyLang
|
2
|
+
class Scanner
|
3
|
+
|
4
|
+
# Tokens the scanner generates
|
5
|
+
# :return => for 'return' tokens
|
6
|
+
# :def => for 'def' tokens
|
7
|
+
# :number => for regexp '\d+'
|
8
|
+
# :id => for '[a-z]+'
|
9
|
+
# :open_block => for '{'
|
10
|
+
# :close_block => for '}'
|
11
|
+
# :eof => for end of file
|
12
|
+
|
13
|
+
IDENTIFIER = /\A[a-z]+/
|
14
|
+
WHITESPACE = /\A\s+/
|
15
|
+
|
16
|
+
LANGUAGE_TOKENS = {
|
17
|
+
number: /\A\d+/,
|
18
|
+
open_block: /\A\{/,
|
19
|
+
close_block: /\A\}/,
|
20
|
+
open_parentheses: /\A\(/,
|
21
|
+
close_parentheses: /\A\)/,
|
22
|
+
comma: /\A,/
|
23
|
+
}
|
24
|
+
|
25
|
+
RESERVED_WORDS = %w[return def]
|
26
|
+
|
27
|
+
def set_program(program)
|
28
|
+
@program = program
|
29
|
+
@token_list =[] # used to keep tokens in look_aheads
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_next_token
|
33
|
+
if @token_list.empty?
|
34
|
+
consume_token
|
35
|
+
else
|
36
|
+
@token_list.shift
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def look_ahead(number_of_tokens = 1)
|
41
|
+
end_of_file_met = false
|
42
|
+
while @token_list.size < number_of_tokens
|
43
|
+
throw :scanner_exception if end_of_file_met
|
44
|
+
token = consume_token
|
45
|
+
@token_list << token
|
46
|
+
end_of_file_met = token.is? :eof
|
47
|
+
end
|
48
|
+
@token_list[number_of_tokens - 1]
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def identifier
|
54
|
+
ident = consume(IDENTIFIER)
|
55
|
+
# Check if the token is part of the reserved words
|
56
|
+
return Token.new(ident.to_sym, ident) if RESERVED_WORDS.include? ident
|
57
|
+
return Token.new(:id,ident)
|
58
|
+
end
|
59
|
+
|
60
|
+
def consume_token
|
61
|
+
clear_whitespace
|
62
|
+
if @program.size == 0
|
63
|
+
return Token.new(:eof)
|
64
|
+
elsif @program =~ IDENTIFIER
|
65
|
+
return identifier
|
66
|
+
end
|
67
|
+
|
68
|
+
# Check for language symbols
|
69
|
+
LANGUAGE_TOKENS.each do |symbol, reg_exp|
|
70
|
+
if @program =~ reg_exp
|
71
|
+
return Token.new(symbol, consume(reg_exp))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
throw :scanner_exception # Unrecognized token
|
76
|
+
end
|
77
|
+
|
78
|
+
def clear_whitespace
|
79
|
+
consume(WHITESPACE)
|
80
|
+
end
|
81
|
+
|
82
|
+
def consume(regexp)
|
83
|
+
content = @program[regexp]
|
84
|
+
@program.gsub!(regexp,"")
|
85
|
+
content
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ToyLang
|
2
|
+
class Token
|
3
|
+
attr_reader :symbol, :content
|
4
|
+
|
5
|
+
def initialize(symbol, content = nil)
|
6
|
+
@symbol = symbol
|
7
|
+
@content = content
|
8
|
+
end
|
9
|
+
|
10
|
+
def is?(symbol)
|
11
|
+
@symbol == symbol
|
12
|
+
end
|
13
|
+
|
14
|
+
def is_not?(symbol)
|
15
|
+
not is? symbol
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/toy_lang/version.rb
CHANGED
data/lib/toy_lang.rb
CHANGED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ToyLang::Parser do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@parser = ToyLang::Parser.new
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "return statement" do
|
10
|
+
it "parses" do
|
11
|
+
@parser.program = "return 2"
|
12
|
+
@parser.statement.should == {return: { number: "2" }}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
describe "function call" do
|
17
|
+
it "parses function" do
|
18
|
+
@parser.program = "methodname(1,3)"
|
19
|
+
@parser.statement.should == { function_call: 'methodname',
|
20
|
+
params: [ {number: "1"}, {number: "3"} ]}
|
21
|
+
end
|
22
|
+
|
23
|
+
it "throws parser_exception when no closing parentheses" do
|
24
|
+
@parser.program = "methodname(1,3"
|
25
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
26
|
+
end
|
27
|
+
|
28
|
+
it "throws parser_exception when no further expression after comma" do
|
29
|
+
@parser.program = "methodname(1,"
|
30
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
31
|
+
end
|
32
|
+
|
33
|
+
it "throws parser_exception when first expression empty" do
|
34
|
+
@parser.program = "methodname(,3)"
|
35
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
36
|
+
end
|
37
|
+
|
38
|
+
it "throws parser_exception when middle expression empty" do
|
39
|
+
@parser.program = "methodname(1,,3)"
|
40
|
+
expect { @parser.statement }.to throw_symbol :parser_exception
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe ToyLang::Scanner do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@scanner = ToyLang::Scanner.new
|
7
|
+
end
|
8
|
+
|
9
|
+
it "returns :eof when no tokens left" do
|
10
|
+
@scanner.set_program("")
|
11
|
+
assert_token_is :eof
|
12
|
+
end
|
13
|
+
|
14
|
+
it "clears white spaces" do
|
15
|
+
@scanner.set_program(" \n\t")
|
16
|
+
assert_token_is :eof
|
17
|
+
end
|
18
|
+
|
19
|
+
it "returns :return when token is 'return'" do
|
20
|
+
@scanner.set_program("return")
|
21
|
+
assert_token_is :return
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns :def when token is 'def'" do
|
25
|
+
@scanner.set_program("def")
|
26
|
+
assert_token_is :def
|
27
|
+
end
|
28
|
+
|
29
|
+
it "returns :id when token is not a reserved word" do
|
30
|
+
@scanner.set_program("classic")
|
31
|
+
assert_token_is :id
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns token content when token is not a reserved word" do
|
35
|
+
@scanner.set_program("classic")
|
36
|
+
assert_token_content_is "classic"
|
37
|
+
end
|
38
|
+
|
39
|
+
it "returns :number when token is digits" do
|
40
|
+
@scanner.set_program("9823")
|
41
|
+
assert_token_is :number
|
42
|
+
end
|
43
|
+
|
44
|
+
it "returns content when token is digits" do
|
45
|
+
@scanner.set_program("9823")
|
46
|
+
assert_token_content_is "9823"
|
47
|
+
end
|
48
|
+
|
49
|
+
it "returns :open_block when token is '{'" do
|
50
|
+
@scanner.set_program("{")
|
51
|
+
assert_token_is :open_block
|
52
|
+
end
|
53
|
+
|
54
|
+
it "returns :close_block when token is '}'" do
|
55
|
+
@scanner.set_program("}")
|
56
|
+
assert_token_is :close_block
|
57
|
+
end
|
58
|
+
|
59
|
+
it "returns :open_parentheses when token is '('" do
|
60
|
+
@scanner.set_program("(")
|
61
|
+
assert_token_is :open_parentheses
|
62
|
+
end
|
63
|
+
|
64
|
+
it "returns :close_parentheses when token is ')'" do
|
65
|
+
@scanner.set_program(")")
|
66
|
+
assert_token_is :close_parentheses
|
67
|
+
end
|
68
|
+
|
69
|
+
it "scans small program" do
|
70
|
+
@scanner.set_program """
|
71
|
+
def method {
|
72
|
+
return 9
|
73
|
+
}
|
74
|
+
"""
|
75
|
+
assert_token_is :def
|
76
|
+
assert_token_and_content_is :id, "method"
|
77
|
+
assert_token_is :open_block
|
78
|
+
assert_token_is :return
|
79
|
+
assert_token_and_content_is :number, "9"
|
80
|
+
assert_token_is :close_block
|
81
|
+
assert_token_is :eof
|
82
|
+
end
|
83
|
+
|
84
|
+
describe "look_ahead" do
|
85
|
+
it "without parameters looks one ahead" do
|
86
|
+
@scanner.set_program("token")
|
87
|
+
@scanner.look_ahead.content.should == "token"
|
88
|
+
end
|
89
|
+
|
90
|
+
it "with parameter looks ahead 'n' tokens" do
|
91
|
+
@scanner.set_program("def method")
|
92
|
+
@scanner.look_ahead(2).content.should == "method"
|
93
|
+
end
|
94
|
+
|
95
|
+
it "does not consume token (e.g. get_next_token gets the next token)" do
|
96
|
+
@scanner.set_program("token")
|
97
|
+
@scanner.look_ahead
|
98
|
+
@scanner.get_next_token.content.should == "token"
|
99
|
+
end
|
100
|
+
|
101
|
+
it "looking ahead of :eof throws exception" do
|
102
|
+
@scanner.set_program("")
|
103
|
+
expect { @scanner.look_ahead(2) }.to throw_symbol :scanner_exception
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def assert_token_content_is(content)
|
108
|
+
@scanner.get_next_token.content.should == content
|
109
|
+
end
|
110
|
+
|
111
|
+
def assert_token_is(symbol)
|
112
|
+
@scanner.get_next_token.symbol.should be symbol
|
113
|
+
end
|
114
|
+
|
115
|
+
def assert_token_and_content_is(symbol, content)
|
116
|
+
token = @scanner.get_next_token
|
117
|
+
token.symbol.should be symbol
|
118
|
+
token.content.should == content
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: toy_lang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rspec
|
@@ -56,7 +56,13 @@ files:
|
|
56
56
|
- README.md
|
57
57
|
- Rakefile
|
58
58
|
- lib/toy_lang.rb
|
59
|
+
- lib/toy_lang/parser.rb
|
60
|
+
- lib/toy_lang/scanner.rb
|
61
|
+
- lib/toy_lang/token.rb
|
59
62
|
- lib/toy_lang/version.rb
|
63
|
+
- spec/spec_helper.rb
|
64
|
+
- spec/toy_lang/parser_spec.rb
|
65
|
+
- spec/toy_lang/scanner_spec.rb
|
60
66
|
- toy_lang.gemspec
|
61
67
|
homepage: ''
|
62
68
|
licenses: []
|
@@ -72,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
72
78
|
version: '0'
|
73
79
|
segments:
|
74
80
|
- 0
|
75
|
-
hash: -
|
81
|
+
hash: -3501641450620113713
|
76
82
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
83
|
none: false
|
78
84
|
requirements:
|
@@ -81,11 +87,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
87
|
version: '0'
|
82
88
|
segments:
|
83
89
|
- 0
|
84
|
-
hash: -
|
90
|
+
hash: -3501641450620113713
|
85
91
|
requirements: []
|
86
92
|
rubyforge_project:
|
87
93
|
rubygems_version: 1.8.24
|
88
94
|
signing_key:
|
89
95
|
specification_version: 3
|
90
96
|
summary: Toy Language parser and scanner to play with language compilation
|
91
|
-
test_files:
|
97
|
+
test_files:
|
98
|
+
- spec/spec_helper.rb
|
99
|
+
- spec/toy_lang/parser_spec.rb
|
100
|
+
- spec/toy_lang/scanner_spec.rb
|