sweet-lang 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/sweet +2 -0
- data/lib/interpreter.rb +141 -0
- data/lib/lexer.rb +3 -0
- data/lib/lexer/core.rb +128 -0
- data/lib/lexer/strings.rb +13 -0
- data/lib/lexer/tags.rb +100 -0
- data/lib/nodes.rb +56 -0
- data/lib/parser.rb +673 -0
- data/lib/runtime.rb +7 -0
- data/lib/runtime/bootstrap.rb +41 -0
- data/lib/runtime/class.rb +43 -0
- data/lib/runtime/context.rb +24 -0
- data/lib/runtime/method.rb +19 -0
- data/lib/runtime/object.rb +18 -0
- data/lib/sweet.rb +28 -0
- metadata +64 -0
data/bin/sweet
ADDED
data/lib/interpreter.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
require "parser"
|
2
|
+
require "runtime"
|
3
|
+
|
4
|
+
class Interpreter
|
5
|
+
def initialize
|
6
|
+
@parser = Parser.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def eval(code)
|
10
|
+
@parser.parse(code).eval(Runtime)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Nodes
|
15
|
+
# This method is the "interpreter" part of our language. All nodes know how to eval
|
16
|
+
# itself and returns the result of its evaluation by implementing the "eval" method.
|
17
|
+
# The "context" variable is the environment in which the node is evaluated (local
|
18
|
+
# variables, current class, etc.).
|
19
|
+
def eval(context)
|
20
|
+
return_value = nil
|
21
|
+
nodes.each do |node|
|
22
|
+
return_value = node.eval(context)
|
23
|
+
end
|
24
|
+
# The last value evaluated in a method is the return value. Or nil if none.
|
25
|
+
return_value || Runtime["nil"]
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class NumberNode
|
30
|
+
def eval(context)
|
31
|
+
# Here we access the Runtime, which we'll see in the next section, to create a new
|
32
|
+
# instance of the Number class.
|
33
|
+
Runtime["Number"].new_with_value(value)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
class StringNode
|
38
|
+
def eval(context)
|
39
|
+
Runtime["String"].new_with_value(value)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
class TrueNode
|
44
|
+
def eval(context)
|
45
|
+
Runtime["true"]
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
class FalseNode
|
50
|
+
def eval(context)
|
51
|
+
Runtime["false"]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class NilNode
|
56
|
+
def eval(context)
|
57
|
+
Runtime["nil"]
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class CallNode
|
62
|
+
def eval(context)
|
63
|
+
# If there's no receiver and the method name is the name of a local variable, then
|
64
|
+
# it's a local variable access. This trick allows us to skip the () when calling a
|
65
|
+
# method.
|
66
|
+
if receiver.nil? && context.locals[method] && arguments.empty?
|
67
|
+
context.locals[method]
|
68
|
+
|
69
|
+
# Method call
|
70
|
+
else
|
71
|
+
if receiver
|
72
|
+
value = receiver.eval(context)
|
73
|
+
else
|
74
|
+
# In case there's no receiver we default to self, calling "print" is like
|
75
|
+
# "self.print".
|
76
|
+
value = context.current_self
|
77
|
+
end
|
78
|
+
|
79
|
+
eval_arguments = arguments.map { |arg| arg.eval(context) }
|
80
|
+
value.call(method, eval_arguments)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
class GetConstantNode
|
86
|
+
def eval(context)
|
87
|
+
context[name]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
class SetConstantNode
|
92
|
+
def eval(context)
|
93
|
+
context[name] = value.eval(context)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class SetLocalNode
|
98
|
+
def eval(context)
|
99
|
+
context.locals[name] = value.eval(context)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
class DefNode
|
104
|
+
def eval(context)
|
105
|
+
# Defining a method is adding a method to the current class.
|
106
|
+
method = SweetMethod.new(params, body)
|
107
|
+
context.current_class.runtime_methods[name] = method
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class ClassNode
|
112
|
+
def eval(context)
|
113
|
+
# Try to locate the class. Allows reopening classes to add methods.
|
114
|
+
sweet_class = context[name]
|
115
|
+
|
116
|
+
unless sweet_class # Class doesn't exist yet
|
117
|
+
sweet_class = SweetClass.new
|
118
|
+
# Register the class as a constant in the runtime.
|
119
|
+
context[name] = sweet_class
|
120
|
+
end
|
121
|
+
|
122
|
+
# Evaluate the body of the class in its context. Providing a custom context allows
|
123
|
+
# to control where methods are added when defined with the def keyword. In this
|
124
|
+
# case, we add them to the newly created class.
|
125
|
+
class_context = Context.new(sweet_class, sweet_class)
|
126
|
+
|
127
|
+
body.eval(class_context)
|
128
|
+
|
129
|
+
sweet_class
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
class IfNode
|
134
|
+
def eval(context)
|
135
|
+
# We turn the condition node into a Ruby value to use Ruby's "if" control
|
136
|
+
# structure.
|
137
|
+
if condition.eval(context).ruby_value
|
138
|
+
body.eval(context)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
data/lib/lexer.rb
ADDED
data/lib/lexer/core.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
class Lexer
|
2
|
+
KEYWORDS = ["def", "class", "if", "true", "false", "nil"]
|
3
|
+
|
4
|
+
def treat(code)
|
5
|
+
tags = Tags.new
|
6
|
+
code = tags.implement_tag(code)
|
7
|
+
end
|
8
|
+
|
9
|
+
def tokenize(code)
|
10
|
+
# Cleanup code by remove extra line breaks
|
11
|
+
code.chomp!
|
12
|
+
|
13
|
+
# Replacing all spaces, inputing tabs
|
14
|
+
code = code.gsub( /[ ]+\n/, "\n" )
|
15
|
+
code = code.gsub( "\n ", "\n\t" )
|
16
|
+
code = code.gsub( /^\s*$/m, '' )
|
17
|
+
|
18
|
+
tags = Tags.new
|
19
|
+
|
20
|
+
code = treat(code)
|
21
|
+
|
22
|
+
# Current character position we're parsing
|
23
|
+
i = 0
|
24
|
+
|
25
|
+
# Collection of all parsed tokens in the form [:TOKEN_TYPE, value]
|
26
|
+
tokens = []
|
27
|
+
|
28
|
+
# Current indent level is the number of spaces in the last indent.
|
29
|
+
current_indent = 0
|
30
|
+
# We keep track of the indentation levels we are in so that when we dedent, we can
|
31
|
+
# check if we're on the correct level.
|
32
|
+
indent_stack = []
|
33
|
+
|
34
|
+
# This is how to implement a very simple scanner.
|
35
|
+
# Scan one character at the time until you find something to parse.
|
36
|
+
while i < code.size
|
37
|
+
chunk = code[i..-1]
|
38
|
+
|
39
|
+
# Matching standard tokens.
|
40
|
+
#
|
41
|
+
# Matching if, print, method names, etc.
|
42
|
+
if identifier = chunk[/\A([a-z]\w*)/, 1]
|
43
|
+
# Keywords are special identifiers tagged with their own name, 'if' will result
|
44
|
+
# in an [:IF, "if"] token
|
45
|
+
if KEYWORDS.include?(identifier)
|
46
|
+
tokens << [identifier.upcase.to_sym, identifier]
|
47
|
+
# Non-keyword identifiers include method and variable names.
|
48
|
+
else
|
49
|
+
tokens << [:IDENTIFIER, identifier]
|
50
|
+
end
|
51
|
+
# skip what we just parsed
|
52
|
+
i += identifier.size
|
53
|
+
|
54
|
+
# Matching class names and constants starting with a capital letter.
|
55
|
+
elsif constant = chunk[/\A([A-Z]\w*)/, 1]
|
56
|
+
tokens << [:CONSTANT, constant]
|
57
|
+
i += constant.size
|
58
|
+
|
59
|
+
elsif number = chunk[/\A([0-9]+)/, 1]
|
60
|
+
tokens << [:NUMBER, number.to_i]
|
61
|
+
i += number.size
|
62
|
+
|
63
|
+
elsif string = chunk[/\A"(.*?)"/, 1]
|
64
|
+
tokens << [:STRING, string]
|
65
|
+
i += string.size + 2
|
66
|
+
|
67
|
+
# Here's the indentation magic!
|
68
|
+
elsif indent = chunk[/\A\n(\t+)/m, 1]
|
69
|
+
# Create a new block we expect the indent level to go up.
|
70
|
+
if indent.size < current_indent
|
71
|
+
indent_stack.pop
|
72
|
+
current_indent = indent_stack.last || 0
|
73
|
+
tokens << [:DEDENT, indent.size]
|
74
|
+
tokens << [:NEWLINE, "\n"]
|
75
|
+
|
76
|
+
elsif indent.size == current_indent
|
77
|
+
tokens << [:NEWLINE, "\n"]
|
78
|
+
|
79
|
+
else
|
80
|
+
# Adjust the current indentation level.
|
81
|
+
current_indent = indent.size
|
82
|
+
indent_stack.push(current_indent)
|
83
|
+
tokens << [:INDENT, indent.size]
|
84
|
+
end
|
85
|
+
|
86
|
+
i += indent.size + 1
|
87
|
+
|
88
|
+
elsif indent = chunk[/\A\n( *)/m, 1]
|
89
|
+
if indent.size == current_indent
|
90
|
+
# Nothing to do, we're still in the same block
|
91
|
+
tokens << [:NEWLINE, "\n"]
|
92
|
+
elsif indent.size < current_indent
|
93
|
+
indent_stack.pop
|
94
|
+
current_indent = indent_stack.last || 0
|
95
|
+
tokens << [:DEDENT, indent.size]
|
96
|
+
tokens << [:NEWLINE, "\n"]
|
97
|
+
end
|
98
|
+
i += indent.size + 1
|
99
|
+
|
100
|
+
# Match long operators such as ||, &&, ==, !=, <= and >=.
|
101
|
+
# One character long operators are matched by the catch all `else` at the bottom.
|
102
|
+
elsif operator = chunk[/\A(\|\||&&|==|!=|<=|>=)/, 1]
|
103
|
+
tokens << [operator, operator]
|
104
|
+
i += operator.size
|
105
|
+
|
106
|
+
# Ignore whitespace
|
107
|
+
elsif chunk.match(/\A /)
|
108
|
+
i += 1
|
109
|
+
|
110
|
+
# Catch all single characters
|
111
|
+
# We treat all other single characters as a token. Eg.: ( ) , . ! + - <
|
112
|
+
else
|
113
|
+
value = chunk[0,1]
|
114
|
+
tokens << [value, value]
|
115
|
+
i += 1
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
# Close all open blocks
|
122
|
+
while indent = indent_stack.pop
|
123
|
+
tokens << [:DEDENT, indent_stack.last || 0]
|
124
|
+
end
|
125
|
+
|
126
|
+
tokens
|
127
|
+
end
|
128
|
+
end
|
data/lib/lexer/tags.rb
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
class Tags
|
2
|
+
|
3
|
+
@@inline_tags = [ "area", "img", "param", "input", "option", "base", "link",
|
4
|
+
"meta", "hr", "br", "wbr" ]
|
5
|
+
|
6
|
+
@@tags = ["DOCTYPE","a","abbr","acronym","address","applet","b",
|
7
|
+
"bdo","big","blockquote","body","button","caption","center","cite","code",
|
8
|
+
"colgroup","dd","del","dfn","dir","div","dl","dt","em","fieldset","font",
|
9
|
+
"form","frameset","h1","h2","h3","h4","h5","h6","head","html","i","iframe",
|
10
|
+
"ins","kbd","label","legend","li","map","menu","noframes","noscript",
|
11
|
+
"object","ol","optgroup","option","p","pre","q","s","samp","script",
|
12
|
+
"select","small","span","strike","strong","style","sub","sup","table",
|
13
|
+
"tbody","td","textarea","tfoot","th","thead","title","tr","tt","u","ul",
|
14
|
+
"var"]
|
15
|
+
|
16
|
+
def convert_tag(code, index, indent, tag, atr, inline=false)
|
17
|
+
content = code.scan(/(\t+[a-zA-Z0-9]+.*)/)
|
18
|
+
tab = ""
|
19
|
+
p_tab = ""
|
20
|
+
for i in 0..indent
|
21
|
+
if i == (indent - 1 )
|
22
|
+
p_tab = tab
|
23
|
+
else
|
24
|
+
tab += "\t"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
attributes = convert_attr(atr).join(" ")
|
28
|
+
if inline
|
29
|
+
new_def = "#{p_tab}def tag_#{index}\n#{tab}print(\"<#{tag} #{attributes} />\")\n#{p_tab}tag_#{index}"
|
30
|
+
else
|
31
|
+
new_def = "def tag_#{index}\n#{tab}print(\"<#{tag} #{attributes}>\")"
|
32
|
+
content.each do |lol|
|
33
|
+
new_def += "\n" + lol[0]
|
34
|
+
end
|
35
|
+
new_def += "\n#{tab}print(\"</#{tag}>\")\n#{p_tab}tag_#{index}"
|
36
|
+
end
|
37
|
+
return new_def
|
38
|
+
end
|
39
|
+
|
40
|
+
def convert_attr(atr)
|
41
|
+
atr.map {|x| x[0] + "'#{x[1]}'" }
|
42
|
+
end
|
43
|
+
|
44
|
+
def adding_attributes(code, tag)
|
45
|
+
_attr = [ [ "id", "#" ], [ "class", "\\." ] ]
|
46
|
+
_attr.each do |a|
|
47
|
+
orig = code.scan(/#{tag} ([.a-zA-Z0-9=-_ ]+)?(#{a[1]}([a-zA-Z0-9]+))/)
|
48
|
+
orig.map! {|x| [x[1], x[2]] }
|
49
|
+
orig.uniq!
|
50
|
+
orig.each do |_old|
|
51
|
+
_new = "#{a[0]}=#{_old[1]}"
|
52
|
+
code = code.gsub(_old[0], _new)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
return code
|
56
|
+
end
|
57
|
+
|
58
|
+
def implement_tag(code, indent=1)
|
59
|
+
new_code = []
|
60
|
+
@@inline_tags.each do |tag|
|
61
|
+
general = code.scan(/(^(\t)+?(#{tag}(()$| )([ #.=_\-a-zA-Z0-9]+)?((\n\t.*)+)?).*)/)
|
62
|
+
general.map! {|x| x[0]}
|
63
|
+
general.each_with_index do |block, index|
|
64
|
+
new_indent = general[0].split("\t").count
|
65
|
+
_code_block = block
|
66
|
+
code_block = adding_attributes(block, tag)
|
67
|
+
strings = Strings.new
|
68
|
+
code_block = strings.convert_string( code_block )
|
69
|
+
atr = code_block.scan(/([a-zA-Z0-9_-]+=)'?"?([a-zA-Z0-9_-]+)'?"?/)
|
70
|
+
code_block = convert_tag( code_block , index, new_indent, tag, atr, true)
|
71
|
+
code = code.gsub(_code_block, code_block)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
@@tags.each do |tag|
|
75
|
+
general = code.scan(/(^(#{tag}(()$| )([ #.=_\-a-zA-Z0-9]+)?((\n\t.*)+)?).*)/)
|
76
|
+
general.map! {|x| x[0]}
|
77
|
+
general.each_with_index do |block, index|
|
78
|
+
_code_block = block
|
79
|
+
code_block = adding_attributes(block, tag)
|
80
|
+
@@tags.each do |r_tag|
|
81
|
+
if !code_block.scan(/^(\t){#{indent}}(#{r_tag}(()$| )([a-zA-Z0-9= \t_\-#.\\'\/]+)?((\n\1{#{indent+1},})?(["#a= ()a-zA-Z0-9_.-\\'\/]+)?)+)/m).empty?
|
82
|
+
r_code = code_block.scan(/^(\t){#{indent}}(#{r_tag}(()$| )([a-zA-Z0-9= \t_\-#.\\'\/]+)?((\n\1{#{indent+1},})?(["#a= ()a-zA-Z0-9_.-\\'\/]+)?)+)/m)
|
83
|
+
r_code.each do |r|
|
84
|
+
new_indent = indent + 1
|
85
|
+
new_block = implement_tag(r[1], new_indent)
|
86
|
+
code_block = code_block.gsub(r[1], new_block)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
strings = Strings.new
|
91
|
+
code_block = strings.convert_string( code_block )
|
92
|
+
atr = code_block.scan(/([a-zA-Z0-9_-]+=)([a-zA-Z0-9_-]+)/)
|
93
|
+
code_block = convert_tag( code_block , index, indent, tag, atr)
|
94
|
+
code = code.gsub(_code_block, code_block)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
return code
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
data/lib/nodes.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
# Collection of nodes each one representing an expression.
|
2
|
+
class Nodes < Struct.new(:nodes)
|
3
|
+
def <<(node)
|
4
|
+
nodes << node
|
5
|
+
self
|
6
|
+
end
|
7
|
+
end
|
8
|
+
|
9
|
+
# Literals are static values that have a Ruby representation, eg.: a string, a number,
|
10
|
+
# true, false, nil, etc.
|
11
|
+
class LiteralNode < Struct.new(:value); end
|
12
|
+
class NumberNode < LiteralNode; end
|
13
|
+
class StringNode < LiteralNode; end
|
14
|
+
class TrueNode < LiteralNode
|
15
|
+
def initialize
|
16
|
+
super(true)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
class FalseNode < LiteralNode
|
20
|
+
def initialize
|
21
|
+
super(false)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
class NilNode < LiteralNode
|
25
|
+
def initialize
|
26
|
+
super(nil)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Node of a method call or local variable access, can take any of these forms:
|
31
|
+
#
|
32
|
+
# method # this form can also be a local variable
|
33
|
+
# method(argument1, argument2)
|
34
|
+
# receiver.method
|
35
|
+
# receiver.method(argument1, argument2)
|
36
|
+
#
|
37
|
+
class CallNode < Struct.new(:receiver, :method, :arguments); end
|
38
|
+
|
39
|
+
# Retrieving the value of a constant.
|
40
|
+
class GetConstantNode < Struct.new(:name); end
|
41
|
+
|
42
|
+
# Setting the value of a constant.
|
43
|
+
class SetConstantNode < Struct.new(:name, :value); end
|
44
|
+
|
45
|
+
# Setting the value of a local variable.
|
46
|
+
class SetLocalNode < Struct.new(:name, :value); end
|
47
|
+
|
48
|
+
# Method definition.
|
49
|
+
class DefNode < Struct.new(:name, :params, :body); end
|
50
|
+
|
51
|
+
# Class definition.
|
52
|
+
class ClassNode < Struct.new(:name, :body); end
|
53
|
+
|
54
|
+
# "if" control structure. Look at this node if you want to implement other control
|
55
|
+
# structures like while, for, loop, etc.
|
56
|
+
class IfNode < Struct.new(:condition, :body); end
|