toy_lang 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,16 @@
1
1
  # ToyLang
2
2
 
3
- TODO: Write a gem description
3
+ Parser for a simple languange. An example program would be:
4
+
5
+ def fibbo(number) {
6
+ if number == 0 { return 0 }
7
+ if number == 1 { return 1 }
8
+ return fibbo(number-1) + fibbo(number-2)
9
+ }
10
+
11
+ fibbo(5)
12
+
13
+ This program should output 8
4
14
 
5
15
  ## Installation
6
16
 
@@ -17,8 +27,16 @@ Or install it yourself as:
17
27
  $ gem install toy_lang
18
28
 
19
29
  ## Usage
20
-
21
- TODO: Write usage instructions here
30
+
31
+ # Create a parser
32
+ @parser = ToyLang::Parser.new
33
+ # Set the program
34
+ @parser.program = "methodname(1,3)"
35
+ # Generate the AST
36
+ puts @parser.program
37
+
38
+ TODO: Find better names to avoid collision between 'program =' and
39
+ program
22
40
 
23
41
  ## Contributing
24
42
 
data/Rakefile CHANGED
@@ -1,2 +1,7 @@
1
1
  #!/usr/bin/env rake
2
2
  require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new('spec')
6
+
7
+ task :default => :spec
@@ -0,0 +1,171 @@
1
+ module ToyLang
2
+
3
+ # This it the class that parses the toy language
4
+ # grammatical rules are lower cased (e.g. statement)
5
+ # tokens are upper case (e.g. COMMA)
6
+ # optional rules are surrounded by parentheses
7
+ #
8
+ # The toy language grammar is as follows
9
+ #
10
+ # program =>
11
+ # statement*
12
+ # statement =>
13
+ # function_definition |
14
+ # conditional_expression |
15
+ # function_call |
16
+ # return_statement
17
+ # function_definition =>
18
+ # function_header OPEN_BLOCK expression* CLOSE_BLOCK
19
+ # function_header =>
20
+ # DEF IDENTIFIER OPEN_PARENTHESES argument_list CLOSE_PARENTHESES
21
+ # argument_list =>
22
+ # (IDENTIFIER ( COMMA IDENTIFIER)*)
23
+ # conditional_expression =>
24
+ # IF condition OPEN_BLOCK expression* CLOSE_BLOCK
25
+ # expression =>
26
+ # additive_expression
27
+ # additive_expression =>
28
+ # substraction_expression PLUS substraction_expression
29
+ # substraction_expression =>
30
+ # primary_expresion MINUS primary_expresion
31
+ # primary_expresion =>
32
+ # NUMBER
33
+ # function_call =>
34
+ # IDENTIFIER OPEN_PARENTHESES parameter_list CLOSE_PARENTHESES
35
+ # parameter_list =>
36
+ # (expression ( COMMA expression)*)
37
+ # return_statement =>
38
+ # RETURN expression
39
+ #
40
+ # An example program would be
41
+ # def fibbo(number) {
42
+ # if number == 0 { return 0 }
43
+ # if number == 1 { return 1 }
44
+ # return fibbo(number-1) + fibbo(number-2)
45
+ # }
46
+ # fibbo(5)
47
+ #
48
+ # This program should output 8
49
+ class Parser
50
+
51
+ def program=(program)
52
+ @scanner = Scanner.new
53
+ @scanner.set_program(program)
54
+ end
55
+
56
+ # statement =>
57
+ # function_definition |
58
+ # conditional_expression |
59
+ # function_call |
60
+ # return_statement
61
+ def statement
62
+ # ast => Abstract Syntax Tree
63
+ if ((ast = function_definition) != nil)
64
+ return ast
65
+ elsif ((ast = conditional_expression) != nil)
66
+ return ast
67
+ elsif ((ast = function_call) != nil)
68
+ return ast
69
+ elsif ((ast = return_statement) != nil)
70
+ return ast
71
+ end
72
+ throw :parser_exception
73
+ end
74
+
75
+ # function_definition =>
76
+ # function_header OPEN_BLOCK expression* CLOSE_BLOCK
77
+ def function_definition
78
+ return nil
79
+ end
80
+
81
+ # conditional_expression =>
82
+ # IF condition OPEN_BLOCK expression* CLOSE_BLOCK
83
+ def conditional_expression
84
+ return nil
85
+ end
86
+
87
+ # function_call =>
88
+ # IDENTIFIER OPEN_PARENTHESES parameter_list CLOSE_PARENTHESES
89
+ def function_call
90
+ unless tokens_are?(:id, :open_parentheses)
91
+ return nil
92
+ end
93
+
94
+ method_name = @scanner.get_next_token.content
95
+ @scanner.get_next_token # open parentheses
96
+ params = parameter_list()
97
+
98
+ # Verify close parentheses
99
+ if token_is_not? :close_parentheses
100
+ throw :parser_exception
101
+ end
102
+
103
+ @scanner.get_next_token # close parentheses
104
+
105
+ return { function_call: method_name, params: params }
106
+ end
107
+
108
+ # parameter_list =>
109
+ # (expression ( COMMA expression)*)
110
+ def parameter_list
111
+ expression_list = []
112
+ expr = expression()
113
+ return [] if expr == nil
114
+
115
+ expression_list << expr
116
+
117
+ while (token_is? :comma)
118
+ @scanner.get_next_token # the comma
119
+ expr = expression()
120
+ expression_list << expr if expr != nil
121
+ end
122
+
123
+ expression_list
124
+ end
125
+
126
+ # return_statement =>
127
+ # RETURN expression
128
+ def return_statement
129
+ unless token_is? :return
130
+ return nil
131
+ end
132
+
133
+ @scanner.get_next_token
134
+ return {return: expression()}
135
+ end
136
+
137
+ # expression =>
138
+ # ....
139
+ # !!! INCOMPLETE IMPLEMENTATION !!!
140
+ # To get going, expression can only be a number
141
+ # TODO: Do it for real
142
+ def expression
143
+ if token_is_not? :number
144
+ nil
145
+ end
146
+
147
+ token = @scanner.get_next_token
148
+ return { number: token.content }
149
+ end
150
+
151
+ private
152
+
153
+ def token_is?(token)
154
+ tokens_are?(token)
155
+ end
156
+
157
+ def token_is_not?(token)
158
+ not token_is? token
159
+ end
160
+
161
+ def tokens_are?(*tokens)
162
+ look_ahead_index = 1
163
+ tokens.each do |token|
164
+ return false if @scanner.look_ahead(look_ahead_index).is_not? token
165
+ look_ahead_index += 1
166
+ end
167
+ return true
168
+ end
169
+
170
+ end
171
+ end
@@ -0,0 +1,89 @@
1
+ module ToyLang
2
+ class Scanner
3
+
4
+ # Tokens the scanner generates
5
+ # :return => for 'return' tokens
6
+ # :def => for 'def' tokens
7
+ # :number => for regexp '\d+'
8
+ # :id => for '[a-z]+'
9
+ # :open_block => for '{'
10
+ # :close_block => for '}'
11
+ # :eof => for end of file
12
+
13
+ IDENTIFIER = /\A[a-z]+/
14
+ WHITESPACE = /\A\s+/
15
+
16
+ LANGUAGE_TOKENS = {
17
+ number: /\A\d+/,
18
+ open_block: /\A\{/,
19
+ close_block: /\A\}/,
20
+ open_parentheses: /\A\(/,
21
+ close_parentheses: /\A\)/,
22
+ comma: /\A,/
23
+ }
24
+
25
+ RESERVED_WORDS = %w[return def]
26
+
27
+ def set_program(program)
28
+ @program = program
29
+ @token_list =[] # used to keep tokens in look_aheads
30
+ end
31
+
32
+ def get_next_token
33
+ if @token_list.empty?
34
+ consume_token
35
+ else
36
+ @token_list.shift
37
+ end
38
+ end
39
+
40
+ def look_ahead(number_of_tokens = 1)
41
+ end_of_file_met = false
42
+ while @token_list.size < number_of_tokens
43
+ throw :scanner_exception if end_of_file_met
44
+ token = consume_token
45
+ @token_list << token
46
+ end_of_file_met = token.is? :eof
47
+ end
48
+ @token_list[number_of_tokens - 1]
49
+ end
50
+
51
+ private
52
+
53
+ def identifier
54
+ ident = consume(IDENTIFIER)
55
+ # Check if the token is part of the reserved words
56
+ return Token.new(ident.to_sym, ident) if RESERVED_WORDS.include? ident
57
+ return Token.new(:id,ident)
58
+ end
59
+
60
+ def consume_token
61
+ clear_whitespace
62
+ if @program.size == 0
63
+ return Token.new(:eof)
64
+ elsif @program =~ IDENTIFIER
65
+ return identifier
66
+ end
67
+
68
+ # Check for language symbols
69
+ LANGUAGE_TOKENS.each do |symbol, reg_exp|
70
+ if @program =~ reg_exp
71
+ return Token.new(symbol, consume(reg_exp))
72
+ end
73
+ end
74
+
75
+ throw :scanner_exception # Unrecognized token
76
+ end
77
+
78
+ def clear_whitespace
79
+ consume(WHITESPACE)
80
+ end
81
+
82
+ def consume(regexp)
83
+ content = @program[regexp]
84
+ @program.gsub!(regexp,"")
85
+ content
86
+ end
87
+
88
+ end
89
+ end
@@ -0,0 +1,18 @@
1
+ module ToyLang
2
+ class Token
3
+ attr_reader :symbol, :content
4
+
5
+ def initialize(symbol, content = nil)
6
+ @symbol = symbol
7
+ @content = content
8
+ end
9
+
10
+ def is?(symbol)
11
+ @symbol == symbol
12
+ end
13
+
14
+ def is_not?(symbol)
15
+ not is? symbol
16
+ end
17
+ end
18
+ end
@@ -1,3 +1,3 @@
1
1
  module ToyLang
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/toy_lang.rb CHANGED
@@ -1,4 +1,7 @@
1
1
  require "toy_lang/version"
2
+ require "toy_lang/scanner"
3
+ require "toy_lang/token"
4
+ require "toy_lang/parser"
2
5
 
3
6
  module ToyLang
4
7
  def self.description
@@ -0,0 +1,8 @@
1
+ require 'rspec'
2
+ require 'toy_lang'
3
+
4
+ RSpec.configure do |config|
5
+ config.color_enabled = true
6
+ config.formatter = 'documentation'
7
+ end
8
+
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+
3
+ describe ToyLang::Parser do
4
+
5
+ before(:each) do
6
+ @parser = ToyLang::Parser.new
7
+ end
8
+
9
+ describe "return statement" do
10
+ it "parses" do
11
+ @parser.program = "return 2"
12
+ @parser.statement.should == {return: { number: "2" }}
13
+ end
14
+ end
15
+
16
+ describe "function call" do
17
+ it "parses function" do
18
+ @parser.program = "methodname(1,3)"
19
+ @parser.statement.should == { function_call: 'methodname',
20
+ params: [ {number: "1"}, {number: "3"} ]}
21
+ end
22
+
23
+ it "throws parser_exception when no closing parentheses" do
24
+ @parser.program = "methodname(1,3"
25
+ expect { @parser.statement }.to throw_symbol :parser_exception
26
+ end
27
+
28
+ it "throws parser_exception when no further expression after comma" do
29
+ @parser.program = "methodname(1,"
30
+ expect { @parser.statement }.to throw_symbol :parser_exception
31
+ end
32
+
33
+ it "throws parser_exception when first expression empty" do
34
+ @parser.program = "methodname(,3)"
35
+ expect { @parser.statement }.to throw_symbol :parser_exception
36
+ end
37
+
38
+ it "throws parser_exception when middle expression empty" do
39
+ @parser.program = "methodname(1,,3)"
40
+ expect { @parser.statement }.to throw_symbol :parser_exception
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,121 @@
1
+ require 'spec_helper'
2
+
3
+ describe ToyLang::Scanner do
4
+
5
+ before(:each) do
6
+ @scanner = ToyLang::Scanner.new
7
+ end
8
+
9
+ it "returns :eof when no tokens left" do
10
+ @scanner.set_program("")
11
+ assert_token_is :eof
12
+ end
13
+
14
+ it "clears white spaces" do
15
+ @scanner.set_program(" \n\t")
16
+ assert_token_is :eof
17
+ end
18
+
19
+ it "returns :return when token is 'return'" do
20
+ @scanner.set_program("return")
21
+ assert_token_is :return
22
+ end
23
+
24
+ it "returns :def when token is 'def'" do
25
+ @scanner.set_program("def")
26
+ assert_token_is :def
27
+ end
28
+
29
+ it "returns :id when token is not a reserved word" do
30
+ @scanner.set_program("classic")
31
+ assert_token_is :id
32
+ end
33
+
34
+ it "returns token content when token is not a reserved word" do
35
+ @scanner.set_program("classic")
36
+ assert_token_content_is "classic"
37
+ end
38
+
39
+ it "returns :number when token is digits" do
40
+ @scanner.set_program("9823")
41
+ assert_token_is :number
42
+ end
43
+
44
+ it "returns content when token is digits" do
45
+ @scanner.set_program("9823")
46
+ assert_token_content_is "9823"
47
+ end
48
+
49
+ it "returns :open_block when token is '{'" do
50
+ @scanner.set_program("{")
51
+ assert_token_is :open_block
52
+ end
53
+
54
+ it "returns :close_block when token is '}'" do
55
+ @scanner.set_program("}")
56
+ assert_token_is :close_block
57
+ end
58
+
59
+ it "returns :open_parentheses when token is '('" do
60
+ @scanner.set_program("(")
61
+ assert_token_is :open_parentheses
62
+ end
63
+
64
+ it "returns :close_parentheses when token is ')'" do
65
+ @scanner.set_program(")")
66
+ assert_token_is :close_parentheses
67
+ end
68
+
69
+ it "scans small program" do
70
+ @scanner.set_program """
71
+ def method {
72
+ return 9
73
+ }
74
+ """
75
+ assert_token_is :def
76
+ assert_token_and_content_is :id, "method"
77
+ assert_token_is :open_block
78
+ assert_token_is :return
79
+ assert_token_and_content_is :number, "9"
80
+ assert_token_is :close_block
81
+ assert_token_is :eof
82
+ end
83
+
84
+ describe "look_ahead" do
85
+ it "without parameters looks one ahead" do
86
+ @scanner.set_program("token")
87
+ @scanner.look_ahead.content.should == "token"
88
+ end
89
+
90
+ it "with parameter looks ahead 'n' tokens" do
91
+ @scanner.set_program("def method")
92
+ @scanner.look_ahead(2).content.should == "method"
93
+ end
94
+
95
+ it "does not consume token (e.g. get_next_token gets the next token)" do
96
+ @scanner.set_program("token")
97
+ @scanner.look_ahead
98
+ @scanner.get_next_token.content.should == "token"
99
+ end
100
+
101
+ it "looking ahead of :eof throws exception" do
102
+ @scanner.set_program("")
103
+ expect { @scanner.look_ahead(2) }.to throw_symbol :scanner_exception
104
+ end
105
+ end
106
+
107
+ def assert_token_content_is(content)
108
+ @scanner.get_next_token.content.should == content
109
+ end
110
+
111
+ def assert_token_is(symbol)
112
+ @scanner.get_next_token.symbol.should be symbol
113
+ end
114
+
115
+ def assert_token_and_content_is(symbol, content)
116
+ token = @scanner.get_next_token
117
+ token.symbol.should be symbol
118
+ token.content.should == content
119
+ end
120
+
121
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: toy_lang
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-22 00:00:00.000000000 Z
12
+ date: 2012-06-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
@@ -56,7 +56,13 @@ files:
56
56
  - README.md
57
57
  - Rakefile
58
58
  - lib/toy_lang.rb
59
+ - lib/toy_lang/parser.rb
60
+ - lib/toy_lang/scanner.rb
61
+ - lib/toy_lang/token.rb
59
62
  - lib/toy_lang/version.rb
63
+ - spec/spec_helper.rb
64
+ - spec/toy_lang/parser_spec.rb
65
+ - spec/toy_lang/scanner_spec.rb
60
66
  - toy_lang.gemspec
61
67
  homepage: ''
62
68
  licenses: []
@@ -72,7 +78,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
72
78
  version: '0'
73
79
  segments:
74
80
  - 0
75
- hash: -795914555458179842
81
+ hash: -3501641450620113713
76
82
  required_rubygems_version: !ruby/object:Gem::Requirement
77
83
  none: false
78
84
  requirements:
@@ -81,11 +87,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
81
87
  version: '0'
82
88
  segments:
83
89
  - 0
84
- hash: -795914555458179842
90
+ hash: -3501641450620113713
85
91
  requirements: []
86
92
  rubyforge_project:
87
93
  rubygems_version: 1.8.24
88
94
  signing_key:
89
95
  specification_version: 3
90
96
  summary: Toy Language parser and scanner to play with language compilation
91
- test_files: []
97
+ test_files:
98
+ - spec/spec_helper.rb
99
+ - spec/toy_lang/parser_spec.rb
100
+ - spec/toy_lang/scanner_spec.rb