skeem 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/skeem/node_builder.rb +47 -0
- data/lib/skeem/s_expr_nodes.rb +86 -0
- data/lib/skeem/tokenizer.rb +66 -7
- data/lib/skeem/version.rb +1 -1
- data/spec/skeem/tokenizer_spec.rb +22 -20
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b820c8eb3683e1da04e9c28e41a4de2517894573
|
4
|
+
data.tar.gz: 31dfc0fa971d27e165a73653abb824fdc39d00b6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 333f74805437ad0199755a3bdb6bb8ffe8197c5d988979cf0bf8ec95dfd751fcdc423dcaa38c26adb892b982f62505a8bcb25ce62a0dade1c026ab05de0b3a5d
|
7
|
+
data.tar.gz: cd5852bda6bb381855a2ef9a64e3d6875da172ede1c6f520d5b97c697e5a484096cc93cd9874a95311b866ff4b248841ed6d1fd0d802e30dc0e3be60e38d5d08
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## [0.0.4] - 2018-08-29
|
2
|
+
### Added
|
3
|
+
- File `s_expr_nodes.rb` with initial implementation of `SExprTerminalNode` classes.
|
4
|
+
|
5
|
+
### Changed
|
6
|
+
- Class`Tokenizer` converts literal into Ruby "native" objects
|
7
|
+
|
8
|
+
|
1
9
|
## [0.0.3] - 2018-08-25
|
2
10
|
### Added
|
3
11
|
- File `grammar.rb` with minimalist grammar.
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require_relative 'regex_repr'
|
3
|
+
|
4
|
+
module SrlRuby
|
5
|
+
# The purpose of a ASTBuilder is to build piece by piece an AST
|
6
|
+
# (Abstract Syntax Tree) from a sequence of input tokens and
|
7
|
+
# visit events produced by walking over a GFGParsing object.
|
8
|
+
# Uses the Builder GoF pattern.
|
9
|
+
# The Builder pattern creates a complex object
|
10
|
+
# (say, a parse tree) from simpler objects (terminal and non-terminal
|
11
|
+
# nodes) and using a step by step approach.
|
12
|
+
class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
|
13
|
+
Terminal2NodeClass = {}.freeze
|
14
|
+
|
15
|
+
# Create a new AST builder instance.
|
16
|
+
# @param theTokens [Array<Token>] The sequence of input tokens.
|
17
|
+
def initialize(theTokens)
|
18
|
+
super(theTokens)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Notification that the parse tree construction is complete.
|
22
|
+
def done!
|
23
|
+
super
|
24
|
+
end
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def terminal2node
|
29
|
+
Terminal2NodeClass
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
# Overriding method.
|
34
|
+
# Factory method for creating a node object for the given
|
35
|
+
# input token.
|
36
|
+
# @param _production [Rley::Syntax::Production]
|
37
|
+
# @param _terminal [Rley::Syntax::Terminal] Terminal symbol associated with the token
|
38
|
+
# @param aTokenPosition [Integer] Position of token in the input stream
|
39
|
+
# @param aToken [Rley::Lexical::Token] The input token
|
40
|
+
def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
|
41
|
+
node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
42
|
+
|
43
|
+
return node
|
44
|
+
end
|
45
|
+
end # class
|
46
|
+
end # module
|
47
|
+
# End of file
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# Classes that implement nodes of Abstract Syntax Trees (AST) representing
|
2
|
+
# Skeem parse results.
|
3
|
+
|
4
|
+
module Skeem
|
5
|
+
# Abstract class. Root of class hierarchy needed for Interpreter
|
6
|
+
# design pattern
|
7
|
+
SExprTerminalNode = Struct.new(:token, :value, :position) do
|
8
|
+
def initialize(aToken, aPosition)
|
9
|
+
self.token = aToken
|
10
|
+
self.position = aPosition
|
11
|
+
init_value(aToken.lexeme)
|
12
|
+
end
|
13
|
+
|
14
|
+
# This method can be overriden
|
15
|
+
def init_value(aValue)
|
16
|
+
self.value = aValue.dup
|
17
|
+
end
|
18
|
+
|
19
|
+
def symbol()
|
20
|
+
token.terminal
|
21
|
+
end
|
22
|
+
|
23
|
+
def interpret()
|
24
|
+
return value
|
25
|
+
end
|
26
|
+
|
27
|
+
def done!()
|
28
|
+
# Do nothing
|
29
|
+
end
|
30
|
+
|
31
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
32
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
33
|
+
def accept(aVisitor)
|
34
|
+
aVisitor.visit_terminal(self)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
class SExprBooleanNode < SExprTerminalNode
|
39
|
+
end # class
|
40
|
+
|
41
|
+
class SExprNumberNode < SExprTerminalNode
|
42
|
+
|
43
|
+
class SExprRealNode < SExprNumberNode
|
44
|
+
end # class
|
45
|
+
|
46
|
+
class SExprIntegerNode < SExprRealNode
|
47
|
+
end # class
|
48
|
+
|
49
|
+
class SExprStringNode < SExprTerminalNode
|
50
|
+
end # class
|
51
|
+
|
52
|
+
=begin
|
53
|
+
class SExprCompositeNode
|
54
|
+
attr_accessor(:children)
|
55
|
+
attr_accessor(:symbol)
|
56
|
+
attr_accessor(:position)
|
57
|
+
|
58
|
+
def initialize(aSymbol, aPosition)
|
59
|
+
@symbol = aSymbol
|
60
|
+
@children = []
|
61
|
+
@position = aPosition
|
62
|
+
end
|
63
|
+
|
64
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
65
|
+
# @param aVisitor[ParseTreeVisitor] the visitor
|
66
|
+
def accept(aVisitor)
|
67
|
+
aVisitor.visit_nonterminal(self)
|
68
|
+
end
|
69
|
+
|
70
|
+
def done!()
|
71
|
+
# Do nothing
|
72
|
+
end
|
73
|
+
|
74
|
+
alias subnodes children
|
75
|
+
end # class
|
76
|
+
|
77
|
+
class SExprUnaryOpNode < SExprCompositeNode
|
78
|
+
def initialize(aSymbol, aPosition)
|
79
|
+
super(aSymbol, aPosition)
|
80
|
+
end
|
81
|
+
|
82
|
+
alias members children
|
83
|
+
end # class
|
84
|
+
=end
|
85
|
+
end # module
|
86
|
+
# End of file
|
data/lib/skeem/tokenizer.rb
CHANGED
@@ -24,12 +24,12 @@ module Skeem
|
|
24
24
|
'(' => 'LPAREN',
|
25
25
|
')' => 'RPAREN'
|
26
26
|
}.freeze
|
27
|
-
|
27
|
+
|
28
28
|
# Here are all the SRL keywords (in uppercase)
|
29
29
|
@@keywords = %w[
|
30
30
|
BEGIN
|
31
31
|
DEFINE
|
32
|
-
].map { |x| [x, x] } .to_h
|
32
|
+
].map { |x| [x, x] } .to_h
|
33
33
|
|
34
34
|
class ScanError < StandardError; end
|
35
35
|
|
@@ -74,14 +74,13 @@ module Skeem
|
|
74
74
|
elsif (lexeme = scanner.scan(/#(?:\.)(?=\s|[|()";]|$)/)) # Single char occurring alone
|
75
75
|
token = build_token('PERIOD', lexeme)
|
76
76
|
elsif (lexeme = scanner.scan(/#(?:t|f|true|false)(?=\s|[|()";]|$)/))
|
77
|
-
token = build_token('BOOLEAN', lexeme)
|
77
|
+
token = build_token('BOOLEAN', lexeme)
|
78
78
|
elsif (lexeme = scanner.scan(/[+-]?[0-9]+(?=\s|[|()";]|$)/))
|
79
79
|
token = build_token('INTEGER', lexeme) # Decimal radix
|
80
80
|
elsif (lexeme = scanner.scan(/[+-]?[0-9]+\.[0-9]+(?:(?:e|E)[+-]?[0-9]+)?/))
|
81
81
|
token = build_token('REAL', lexeme)
|
82
82
|
elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
|
83
|
-
|
84
|
-
token = build_token('STRING_LIT', unquoted)
|
83
|
+
token = build_token('STRING_LIT', lexeme)
|
85
84
|
elsif (lexeme = scanner.scan(/[a-zA-Z!$%&*\/:<=>?@^_~][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
|
86
85
|
keyw = @@keywords[lexeme.upcase]
|
87
86
|
tok_type = keyw ? keyw : 'IDENTIFIER'
|
@@ -107,11 +106,12 @@ module Skeem
|
|
107
106
|
return token
|
108
107
|
end
|
109
108
|
|
110
|
-
def build_token(aSymbolName, aLexeme)
|
109
|
+
def build_token(aSymbolName, aLexeme, aFormat = :default)
|
111
110
|
begin
|
111
|
+
value = convert_to(aLexeme, aSymbolName, aFormat)
|
112
112
|
col = scanner.pos - aLexeme.size - @line_start + 1
|
113
113
|
pos = Position.new(@lineno, col)
|
114
|
-
token = SToken.new(
|
114
|
+
token = SToken.new(value, aSymbolName, pos)
|
115
115
|
rescue StandardError => exc
|
116
116
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
117
117
|
raise exc
|
@@ -120,6 +120,65 @@ module Skeem
|
|
120
120
|
return token
|
121
121
|
end
|
122
122
|
|
123
|
+
def convert_to(aLexeme, aSymbolName, aFormat)
|
124
|
+
case aSymbolName
|
125
|
+
when 'BOOLEAN'
|
126
|
+
value = to_boolean(aLexeme, aFormat)
|
127
|
+
when 'INTEGER'
|
128
|
+
value = to_integer(aLexeme, aFormat)
|
129
|
+
when 'REAL'
|
130
|
+
value = to_real(aLexeme, aFormat)
|
131
|
+
when 'STRING_LIT'
|
132
|
+
value = to_string(aLexeme, aFormat)
|
133
|
+
when 'SYMBOL'
|
134
|
+
value = to_string(aLexeme, aFormat)
|
135
|
+
else
|
136
|
+
value = aLexeme
|
137
|
+
end
|
138
|
+
|
139
|
+
return value
|
140
|
+
end
|
141
|
+
|
142
|
+
def to_boolean(aLexeme, aFormat)
|
143
|
+
result = (aLexeme =~ /^#t/) ? true : false
|
144
|
+
end
|
145
|
+
|
146
|
+
def to_integer(aLexeme, aFormat)
|
147
|
+
case aFormat
|
148
|
+
when :default, :base10
|
149
|
+
value = aLexeme.to_i
|
150
|
+
end
|
151
|
+
|
152
|
+
return value
|
153
|
+
end
|
154
|
+
|
155
|
+
def to_real(aLexeme, aFormat)
|
156
|
+
case aFormat
|
157
|
+
when :default
|
158
|
+
value = aLexeme.to_f
|
159
|
+
end
|
160
|
+
|
161
|
+
return value
|
162
|
+
end
|
163
|
+
|
164
|
+
def to_string(aLexeme, aFormat)
|
165
|
+
case aFormat
|
166
|
+
when :default
|
167
|
+
value = aLexeme.gsub(/(^")|("$)/, '')
|
168
|
+
end
|
169
|
+
|
170
|
+
return value
|
171
|
+
end
|
172
|
+
|
173
|
+
def to_symbol(aLexeme, aFormat)
|
174
|
+
case aFormat
|
175
|
+
when :default
|
176
|
+
value = aLexeme
|
177
|
+
end
|
178
|
+
|
179
|
+
return value
|
180
|
+
end
|
181
|
+
|
123
182
|
def skip_whitespaces
|
124
183
|
pre_pos = scanner.pos
|
125
184
|
|
data/lib/skeem/version.rb
CHANGED
@@ -43,10 +43,10 @@ module Skeem
|
|
43
43
|
it 'should tokenize boolean constants' do
|
44
44
|
tests = [
|
45
45
|
# couple [raw input, expected]
|
46
|
-
['#t',
|
47
|
-
[' #f',
|
48
|
-
['#true ',
|
49
|
-
[' #false',
|
46
|
+
['#t', true],
|
47
|
+
[' #f', false],
|
48
|
+
['#true ', true],
|
49
|
+
[' #false', false]
|
50
50
|
]
|
51
51
|
|
52
52
|
tests.each do |(input, prediction)|
|
@@ -62,11 +62,11 @@ module Skeem
|
|
62
62
|
it 'should tokenize integers in default radix 10' do
|
63
63
|
tests = [
|
64
64
|
# couple [raw input, expected]
|
65
|
-
['0',
|
66
|
-
[' 3',
|
67
|
-
['+3 ',
|
68
|
-
['-3',
|
69
|
-
['-1234',
|
65
|
+
['0', 0],
|
66
|
+
[' 3', 3],
|
67
|
+
['+3 ', +3],
|
68
|
+
['-3', -3],
|
69
|
+
['-1234', -1234]
|
70
70
|
]
|
71
71
|
|
72
72
|
tests.each do |(input, prediction)|
|
@@ -82,9 +82,9 @@ module Skeem
|
|
82
82
|
it 'should tokenize real numbers' do
|
83
83
|
tests = [
|
84
84
|
# couple [raw input, expected]
|
85
|
-
["\t\t3.45e+6",
|
86
|
-
['+3.45e+6',
|
87
|
-
['-3.45e+6',
|
85
|
+
["\t\t3.45e+6", 3.45e+6],
|
86
|
+
['+3.45e+6', +3.45e+6],
|
87
|
+
['-3.45e+6', -3.45e+6]
|
88
88
|
]
|
89
89
|
|
90
90
|
tests.each do |(input, prediction)|
|
@@ -96,6 +96,7 @@ module Skeem
|
|
96
96
|
end
|
97
97
|
end # context
|
98
98
|
|
99
|
+
# TODO
|
99
100
|
context 'String recognition:' do
|
100
101
|
it 'should tokenize strings' do
|
101
102
|
examples = [
|
@@ -114,13 +115,13 @@ module Skeem
|
|
114
115
|
end
|
115
116
|
end # context
|
116
117
|
|
117
|
-
|
118
|
-
For later:
|
119
|
-
"Another example:\ntwo lines of text"
|
120
|
-
"Here's text \
|
121
|
-
containing just one line"
|
122
|
-
"\x03B1; is named GREEK SMALL LETTER ALPHA."
|
123
|
-
|
118
|
+
|
119
|
+
# For later:
|
120
|
+
# "Another example:\ntwo lines of text"
|
121
|
+
# "Here's text \
|
122
|
+
# containing just one line"
|
123
|
+
# "\x03B1; is named GREEK SMALL LETTER ALPHA."
|
124
|
+
|
124
125
|
|
125
126
|
context 'Identifier recognition:' do
|
126
127
|
it 'should tokenize identifier' do
|
@@ -141,7 +142,7 @@ containing just one line"
|
|
141
142
|
end
|
142
143
|
end
|
143
144
|
end # context
|
144
|
-
|
145
|
+
=begin
|
145
146
|
context 'Scanning Scheme sample code' do
|
146
147
|
it 'should read examples from lis.py page' do
|
147
148
|
# Shallow tokenizer testing
|
@@ -201,5 +202,6 @@ SCHEME
|
|
201
202
|
match_expectations(subject, predicted)
|
202
203
|
end
|
203
204
|
end # context
|
205
|
+
=end
|
204
206
|
end # describe
|
205
207
|
end # module
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: skeem
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rley
|
@@ -86,7 +86,9 @@ files:
|
|
86
86
|
- appveyor.yml
|
87
87
|
- lib/skeem.rb
|
88
88
|
- lib/skeem/grammar.rb
|
89
|
+
- lib/skeem/node_builder.rb
|
89
90
|
- lib/skeem/parser.rb
|
91
|
+
- lib/skeem/s_expr_nodes.rb
|
90
92
|
- lib/skeem/stoken.rb
|
91
93
|
- lib/skeem/tokenizer.rb
|
92
94
|
- lib/skeem/version.rb
|