skeem 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/skeem/node_builder.rb +47 -0
- data/lib/skeem/s_expr_nodes.rb +86 -0
- data/lib/skeem/tokenizer.rb +66 -7
- data/lib/skeem/version.rb +1 -1
- data/spec/skeem/tokenizer_spec.rb +22 -20
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b820c8eb3683e1da04e9c28e41a4de2517894573
|
4
|
+
data.tar.gz: 31dfc0fa971d27e165a73653abb824fdc39d00b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 333f74805437ad0199755a3bdb6bb8ffe8197c5d988979cf0bf8ec95dfd751fcdc423dcaa38c26adb892b982f62505a8bcb25ce62a0dade1c026ab05de0b3a5d
|
7
|
+
data.tar.gz: cd5852bda6bb381855a2ef9a64e3d6875da172ede1c6f520d5b97c697e5a484096cc93cd9874a95311b866ff4b248841ed6d1fd0d802e30dc0e3be60e38d5d08
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## [0.0.4] - 2018-08-29
|
2
|
+
### Added
|
3
|
+
- File `s_expr_nodes.rb` with initial implementation of `SExprTerminalNode` classes.
|
4
|
+
|
5
|
+
### Changed
|
6
|
+
- Class`Tokenizer` converts literal into Ruby "native" objects
|
7
|
+
|
8
|
+
|
1
9
|
## [0.0.3] - 2018-08-25
|
2
10
|
### Added
|
3
11
|
- File `grammar.rb` with minimalist grammar.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require_relative 'regex_repr'
|
3
|
+
|
4
|
+
module SrlRuby
|
5
|
+
# The purpose of a ASTBuilder is to build piece by piece an AST
|
6
|
+
# (Abstract Syntax Tree) from a sequence of input tokens and
|
7
|
+
# visit events produced by walking over a GFGParsing object.
|
8
|
+
# Uses the Builder GoF pattern.
|
9
|
+
# The Builder pattern creates a complex object
|
10
|
+
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
11
|
+
# nodes) and using a step by step approach.
|
12
|
+
class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
|
13
|
+
Terminal2NodeClass = {}.freeze
|
14
|
+
|
15
|
+
# Create a new AST builder instance.
|
16
|
+
# @param theTokens [Array<Token>] The sequence of input tokens.
|
17
|
+
def initialize(theTokens)
|
18
|
+
super(theTokens)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Notification that the parse tree construction is complete.
|
22
|
+
def done!
|
23
|
+
super
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def terminal2node
|
29
|
+
Terminal2NodeClass
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
# Overriding method.
|
34
|
+
# Factory method for creating a node object for the given
|
35
|
+
# input token.
|
36
|
+
# @param _production [Rley::Syntax::Production]
|
37
|
+
# @param _terminal [Rley::Syntax::Terminal] Terminal symbol associated with the token
|
38
|
+
# @param aTokenPosition [Integer] Position of token in the input stream
|
39
|
+
# @param aToken [Rley::Lexical::Token] The input token
|
40
|
+
def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
|
41
|
+
node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
42
|
+
|
43
|
+
return node
|
44
|
+
end
|
45
|
+
end # class
|
46
|
+
end # module
|
47
|
+
# End of file
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# Classes that implement nodes of Abstract Syntax Trees (AST) representing
|
2
|
+
# Skeem parse results.
|
3
|
+
|
4
|
+
module Skeem
|
5
|
+
# Abstract class. Root of class hierarchy needed for Interpreter
|
6
|
+
# design pattern
|
7
|
+
SExprTerminalNode = Struct.new(:token, :value, :position) do
|
8
|
+
def initialize(aToken, aPosition)
|
9
|
+
self.token = aToken
|
10
|
+
self.position = aPosition
|
11
|
+
init_value(aToken.lexeme)
|
12
|
+
end
|
13
|
+
|
14
|
+
# This method can be overriden
|
15
|
+
def init_value(aValue)
|
16
|
+
self.value = aValue.dup
|
17
|
+
end
|
18
|
+
|
19
|
+
def symbol()
|
20
|
+
token.terminal
|
21
|
+
end
|
22
|
+
|
23
|
+
def interpret()
|
24
|
+
return value
|
25
|
+
end
|
26
|
+
|
27
|
+
def done!()
|
28
|
+
# Do nothing
|
29
|
+
end
|
30
|
+
|
31
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
32
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
33
|
+
def accept(aVisitor)
|
34
|
+
aVisitor.visit_terminal(self)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class SExprBooleanNode < SExprTerminalNode
|
39
|
+
end # class
|
40
|
+
|
41
|
+
class SExprNumberNode < SExprTerminalNode
|
42
|
+
|
43
|
+
class SExprRealNode < SExprNumberNode
|
44
|
+
end # class
|
45
|
+
|
46
|
+
class SExprIntegerNode < SExprRealNode
|
47
|
+
end # class
|
48
|
+
|
49
|
+
class SExprStringNode < SExprTerminalNode
|
50
|
+
end # class
|
51
|
+
|
52
|
+
=begin
|
53
|
+
class SExprCompositeNode
|
54
|
+
attr_accessor(:children)
|
55
|
+
attr_accessor(:symbol)
|
56
|
+
attr_accessor(:position)
|
57
|
+
|
58
|
+
def initialize(aSymbol, aPosition)
|
59
|
+
@symbol = aSymbol
|
60
|
+
@children = []
|
61
|
+
@position = aPosition
|
62
|
+
end
|
63
|
+
|
64
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
65
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
66
|
+
def accept(aVisitor)
|
67
|
+
aVisitor.visit_nonterminal(self)
|
68
|
+
end
|
69
|
+
|
70
|
+
def done!()
|
71
|
+
# Do nothing
|
72
|
+
end
|
73
|
+
|
74
|
+
alias subnodes children
|
75
|
+
end # class
|
76
|
+
|
77
|
+
class SExprUnaryOpNode < SExprCompositeNode
|
78
|
+
def initialize(aSymbol, aPosition)
|
79
|
+
super(aSymbol, aPosition)
|
80
|
+
end
|
81
|
+
|
82
|
+
alias members children
|
83
|
+
end # class
|
84
|
+
=end
|
85
|
+
end # module
|
86
|
+
# End of file
|
data/lib/skeem/tokenizer.rb
CHANGED
@@ -24,12 +24,12 @@ module Skeem
|
|
24
24
|
'(' => 'LPAREN',
|
25
25
|
')' => 'RPAREN'
|
26
26
|
}.freeze
|
27
|
-
|
27
|
+
|
28
28
|
# Here are all the SRL keywords (in uppercase)
|
29
29
|
@@keywords = %w[
|
30
30
|
BEGIN
|
31
31
|
DEFINE
|
32
|
-
].map { |x| [x, x] } .to_h
|
32
|
+
].map { |x| [x, x] } .to_h
|
33
33
|
|
34
34
|
class ScanError < StandardError; end
|
35
35
|
|
@@ -74,14 +74,13 @@ module Skeem
|
|
74
74
|
elsif (lexeme = scanner.scan(/#(?:\.)(?=\s|[|()";]|$)/)) # Single char occurring alone
|
75
75
|
token = build_token('PERIOD', lexeme)
|
76
76
|
elsif (lexeme = scanner.scan(/#(?:t|f|true|false)(?=\s|[|()";]|$)/))
|
77
|
-
token = build_token('BOOLEAN', lexeme)
|
77
|
+
token = build_token('BOOLEAN', lexeme)
|
78
78
|
elsif (lexeme = scanner.scan(/[+-]?[0-9]+(?=\s|[|()";]|$)/))
|
79
79
|
token = build_token('INTEGER', lexeme) # Decimal radix
|
80
80
|
elsif (lexeme = scanner.scan(/[+-]?[0-9]+\.[0-9]+(?:(?:e|E)[+-]?[0-9]+)?/))
|
81
81
|
token = build_token('REAL', lexeme)
|
82
82
|
elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
|
83
|
-
|
84
|
-
token = build_token('STRING_LIT', unquoted)
|
83
|
+
token = build_token('STRING_LIT', lexeme)
|
85
84
|
elsif (lexeme = scanner.scan(/[a-zA-Z!$%&*\/:<=>?@^_~][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
|
86
85
|
keyw = @@keywords[lexeme.upcase]
|
87
86
|
tok_type = keyw ? keyw : 'IDENTIFIER'
|
@@ -107,11 +106,12 @@ module Skeem
|
|
107
106
|
return token
|
108
107
|
end
|
109
108
|
|
110
|
-
def build_token(aSymbolName, aLexeme)
|
109
|
+
def build_token(aSymbolName, aLexeme, aFormat = :default)
|
111
110
|
begin
|
111
|
+
value = convert_to(aLexeme, aSymbolName, aFormat)
|
112
112
|
col = scanner.pos - aLexeme.size - @line_start + 1
|
113
113
|
pos = Position.new(@lineno, col)
|
114
|
-
token = SToken.new(
|
114
|
+
token = SToken.new(value, aSymbolName, pos)
|
115
115
|
rescue StandardError => exc
|
116
116
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
117
117
|
raise exc
|
@@ -120,6 +120,65 @@ module Skeem
|
|
120
120
|
return token
|
121
121
|
end
|
122
122
|
|
123
|
+
def convert_to(aLexeme, aSymbolName, aFormat)
|
124
|
+
case aSymbolName
|
125
|
+
when 'BOOLEAN'
|
126
|
+
value = to_boolean(aLexeme, aFormat)
|
127
|
+
when 'INTEGER'
|
128
|
+
value = to_integer(aLexeme, aFormat)
|
129
|
+
when 'REAL'
|
130
|
+
value = to_real(aLexeme, aFormat)
|
131
|
+
when 'STRING_LIT'
|
132
|
+
value = to_string(aLexeme, aFormat)
|
133
|
+
when 'SYMBOL'
|
134
|
+
value = to_string(aLexeme, aFormat)
|
135
|
+
else
|
136
|
+
value = aLexeme
|
137
|
+
end
|
138
|
+
|
139
|
+
return value
|
140
|
+
end
|
141
|
+
|
142
|
+
def to_boolean(aLexeme, aFormat)
|
143
|
+
result = (aLexeme =~ /^#t/) ? true : false
|
144
|
+
end
|
145
|
+
|
146
|
+
def to_integer(aLexeme, aFormat)
|
147
|
+
case aFormat
|
148
|
+
when :default, :base10
|
149
|
+
value = aLexeme.to_i
|
150
|
+
end
|
151
|
+
|
152
|
+
return value
|
153
|
+
end
|
154
|
+
|
155
|
+
def to_real(aLexeme, aFormat)
|
156
|
+
case aFormat
|
157
|
+
when :default
|
158
|
+
value = aLexeme.to_f
|
159
|
+
end
|
160
|
+
|
161
|
+
return value
|
162
|
+
end
|
163
|
+
|
164
|
+
def to_string(aLexeme, aFormat)
|
165
|
+
case aFormat
|
166
|
+
when :default
|
167
|
+
value = aLexeme.gsub(/(^")|("$)/, '')
|
168
|
+
end
|
169
|
+
|
170
|
+
return value
|
171
|
+
end
|
172
|
+
|
173
|
+
def to_symbol(aLexeme, aFormat)
|
174
|
+
case aFormat
|
175
|
+
when :default
|
176
|
+
value = aLexeme
|
177
|
+
end
|
178
|
+
|
179
|
+
return value
|
180
|
+
end
|
181
|
+
|
123
182
|
def skip_whitespaces
|
124
183
|
pre_pos = scanner.pos
|
125
184
|
|
data/lib/skeem/version.rb
CHANGED
@@ -43,10 +43,10 @@ module Skeem
|
|
43
43
|
it 'should tokenize boolean constants' do
|
44
44
|
tests = [
|
45
45
|
# couple [raw input, expected]
|
46
|
-
['#t',
|
47
|
-
[' #f',
|
48
|
-
['#true ',
|
49
|
-
[' #false',
|
46
|
+
['#t', true],
|
47
|
+
[' #f', false],
|
48
|
+
['#true ', true],
|
49
|
+
[' #false', false]
|
50
50
|
]
|
51
51
|
|
52
52
|
tests.each do |(input, prediction)|
|
@@ -62,11 +62,11 @@ module Skeem
|
|
62
62
|
it 'should tokenize integers in default radix 10' do
|
63
63
|
tests = [
|
64
64
|
# couple [raw input, expected]
|
65
|
-
['0',
|
66
|
-
[' 3',
|
67
|
-
['+3 ',
|
68
|
-
['-3',
|
69
|
-
['-1234',
|
65
|
+
['0', 0],
|
66
|
+
[' 3', 3],
|
67
|
+
['+3 ', +3],
|
68
|
+
['-3', -3],
|
69
|
+
['-1234', -1234]
|
70
70
|
]
|
71
71
|
|
72
72
|
tests.each do |(input, prediction)|
|
@@ -82,9 +82,9 @@ module Skeem
|
|
82
82
|
it 'should tokenize real numbers' do
|
83
83
|
tests = [
|
84
84
|
# couple [raw input, expected]
|
85
|
-
["\t\t3.45e+6",
|
86
|
-
['+3.45e+6',
|
87
|
-
['-3.45e+6',
|
85
|
+
["\t\t3.45e+6", 3.45e+6],
|
86
|
+
['+3.45e+6', +3.45e+6],
|
87
|
+
['-3.45e+6', -3.45e+6]
|
88
88
|
]
|
89
89
|
|
90
90
|
tests.each do |(input, prediction)|
|
@@ -96,6 +96,7 @@ module Skeem
|
|
96
96
|
end
|
97
97
|
end # context
|
98
98
|
|
99
|
+
# TODO
|
99
100
|
context 'String recognition:' do
|
100
101
|
it 'should tokenize strings' do
|
101
102
|
examples = [
|
@@ -114,13 +115,13 @@ module Skeem
|
|
114
115
|
end
|
115
116
|
end # context
|
116
117
|
|
117
|
-
|
118
|
-
For later:
|
119
|
-
"Another example:\ntwo lines of text"
|
120
|
-
"Here's text \
|
121
|
-
containing just one line"
|
122
|
-
"\x03B1; is named GREEK SMALL LETTER ALPHA."
|
123
|
-
|
118
|
+
|
119
|
+
# For later:
|
120
|
+
# "Another example:\ntwo lines of text"
|
121
|
+
# "Here's text \
|
122
|
+
# containing just one line"
|
123
|
+
# "\x03B1; is named GREEK SMALL LETTER ALPHA."
|
124
|
+
|
124
125
|
|
125
126
|
context 'Identifier recognition:' do
|
126
127
|
it 'should tokenize identifier' do
|
@@ -141,7 +142,7 @@ containing just one line"
|
|
141
142
|
end
|
142
143
|
end
|
143
144
|
end # context
|
144
|
-
|
145
|
+
=begin
|
145
146
|
context 'Scanning Scheme sample code' do
|
146
147
|
it 'should read examples from lis.py page' do
|
147
148
|
# Shallow tokenizer testing
|
@@ -201,5 +202,6 @@ SCHEME
|
|
201
202
|
match_expectations(subject, predicted)
|
202
203
|
end
|
203
204
|
end # context
|
205
|
+
=end
|
204
206
|
end # describe
|
205
207
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: skeem
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -86,7 +86,9 @@ files:
|
|
86
86
|
- appveyor.yml
|
87
87
|
- lib/skeem.rb
|
88
88
|
- lib/skeem/grammar.rb
|
89
|
+
- lib/skeem/node_builder.rb
|
89
90
|
- lib/skeem/parser.rb
|
91
|
+
- lib/skeem/s_expr_nodes.rb
|
90
92
|
- lib/skeem/stoken.rb
|
91
93
|
- lib/skeem/tokenizer.rb
|
92
94
|
- lib/skeem/version.rb
|