skeem 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +1 -1
- data/CHANGELOG.md +5 -1
- data/lib/skeem/stoken.rb +22 -0
- data/lib/skeem/tokenizer.rb +118 -0
- data/lib/skeem/version.rb +1 -1
- data/skeem.gemspec +5 -1
- data/spec/skeem/tokenizer_spec.rb +37 -0
- data/spec/spec_helper.rb +6 -4
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 31d8e575d516bf4429e18814019ca54198bd1088
|
4
|
+
data.tar.gz: 35a3d39adf7748b70c07cc1553a1b35ec94cd108
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a440baab7b58725760e00511c68c879fed77a6fc168fbafd89f361fa3fc3b49ba69cc5901a77438e4eacbcb2942679b6247d67ae02dec8e44d4145f003ed8eff
|
7
|
+
data.tar.gz: 21457dcdd46a2cf2efc1ac4d184be50748429954f79de03480f1c6dd26094a08fda17c321cf231a7954bcac3c95593ff75a89238e0e1e97b23d69058a92ab68c
|
data/.rspec
CHANGED
data/CHANGELOG.md
CHANGED
data/lib/skeem/stoken.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rley' # Load the Rley gem
|
2
|
+
|
3
|
+
module Skeem
|
4
|
+
Position = Struct.new(:line, :column) do
|
5
|
+
def to_s
|
6
|
+
"line #{line}, column #{column}"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
# Specialization of Token class.
|
11
|
+
# It stores the position in (line, row) of the token
|
12
|
+
class SToken < Rley::Lexical::Token
|
13
|
+
attr_reader(:position)
|
14
|
+
|
15
|
+
def initialize(theLexeme, aTerminal, aPosition)
|
16
|
+
super(theLexeme, aTerminal)
|
17
|
+
@position = aPosition
|
18
|
+
end
|
19
|
+
end # class
|
20
|
+
end # module
|
21
|
+
|
22
|
+
# End of file
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# File: tokenizer.rb
|
2
|
+
# Tokenizer for Skeem language (a small subset of Scheme)
|
3
|
+
require 'strscan'
|
4
|
+
require_relative 'stoken'
|
5
|
+
|
6
|
+
module Skeem
|
7
|
+
# A tokenizer for the Skeem dialect.
|
8
|
+
# Responsibility: break Skeem input into a sequence of token objects.
|
9
|
+
# The tokenizer should recognize:
|
10
|
+
# Identifiers:
|
11
|
+
# Integer literals including single digit
|
12
|
+
# String literals (quote delimited)
|
13
|
+
# Single character literal
|
14
|
+
# Delimiters: parentheses '(', ')'
|
15
|
+
# Separators: comma
|
16
|
+
class Tokenizer
|
17
|
+
attr_reader(:scanner)
|
18
|
+
attr_reader(:lineno)
|
19
|
+
attr_reader(:line_start)
|
20
|
+
|
21
|
+
@@lexeme2name = {
|
22
|
+
"'" => 'APOSTROPHE',
|
23
|
+
'`' => 'BACKQUOTE',
|
24
|
+
'(' => 'LPAREN',
|
25
|
+
')' => 'RPAREN'
|
26
|
+
}.freeze
|
27
|
+
|
28
|
+
class ScanError < StandardError; end
|
29
|
+
|
30
|
+
# Constructor. Initialize a tokenizer for Skeem.
|
31
|
+
# @param source [String] Skeem text to tokenize.
|
32
|
+
def initialize(source)
|
33
|
+
@scanner = StringScanner.new(source)
|
34
|
+
@lineno = 1
|
35
|
+
@line_start = 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Array<SToken>] | Returns a sequence of tokens
|
39
|
+
def tokens
|
40
|
+
tok_sequence = []
|
41
|
+
until @scanner.eos?
|
42
|
+
token = _next_token
|
43
|
+
tok_sequence << token unless token.nil?
|
44
|
+
end
|
45
|
+
|
46
|
+
return tok_sequence
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def _next_token
|
52
|
+
skip_whitespaces
|
53
|
+
curr_ch = scanner.peek(1)
|
54
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
55
|
+
|
56
|
+
token = nil
|
57
|
+
|
58
|
+
if "()'`".include? curr_ch
|
59
|
+
# Delimiters, separators => single character token
|
60
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
61
|
+
elsif (lexeme = scanner.scan(/#(?:t|f|true|false)((?=\s|[|()";])|$)/))
|
62
|
+
token = build_token('BOOLEAN', lexeme) # normalized lexeme
|
63
|
+
elsif (lexeme = scanner.scan(/[0-9]+((?=\s|[|()";])|$)/))
|
64
|
+
token = build_token('INTEGER', lexeme) # Decimal radix
|
65
|
+
elsif (lexeme = scanner.scan(/-?[0-9]+(\.[0-9]+)?((?=\s|[|()";])|$)/))
|
66
|
+
token = build_token('REAL', lexeme)
|
67
|
+
elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/)) # Double quotes literal?
|
68
|
+
unquoted = lexeme.gsub(/(^")|("$)/, '')
|
69
|
+
token = build_token('STRING_LIT', unquoted)
|
70
|
+
elsif (lexeme = scanner.scan(/([\+\-])((?=\s|[|()";])|$)/))
|
71
|
+
token = build_token('IDENTIFIER', lexeme) # Plus and minus as identifiers
|
72
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z!$%&*\/:<=>?@^_~][a-zA-Z0-9!$%&*+-.\/:<=>?@^_~+-]*/))
|
73
|
+
token = build_token('IDENTIFIER', lexeme)
|
74
|
+
else # Unknown token
|
75
|
+
erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
76
|
+
sequel = scanner.scan(/.{1,20}/)
|
77
|
+
erroneous += sequel unless sequel.nil?
|
78
|
+
raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
|
79
|
+
end
|
80
|
+
|
81
|
+
return token
|
82
|
+
end
|
83
|
+
|
84
|
+
def build_token(aSymbolName, aLexeme)
|
85
|
+
begin
|
86
|
+
col = scanner.pos - aLexeme.size - @line_start + 1
|
87
|
+
pos = Position.new(@lineno, col)
|
88
|
+
token = SToken.new(aLexeme, aSymbolName, pos)
|
89
|
+
rescue StandardError => exc
|
90
|
+
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
91
|
+
raise exc
|
92
|
+
end
|
93
|
+
|
94
|
+
return token
|
95
|
+
end
|
96
|
+
|
97
|
+
def skip_whitespaces
|
98
|
+
pre_pos = scanner.pos
|
99
|
+
|
100
|
+
loop do
|
101
|
+
ws_found = false
|
102
|
+
found = scanner.skip(/[ \t\f]+/)
|
103
|
+
ws_found = true if found
|
104
|
+
found = scanner.skip(/(?:\r\n)|\r|\n/)
|
105
|
+
if found
|
106
|
+
ws_found = true
|
107
|
+
@lineno += 1
|
108
|
+
@line_start = scanner.pos
|
109
|
+
end
|
110
|
+
break unless ws_found
|
111
|
+
end
|
112
|
+
|
113
|
+
curr_pos = scanner.pos
|
114
|
+
return if curr_pos == pre_pos
|
115
|
+
end
|
116
|
+
end # class
|
117
|
+
end # module
|
118
|
+
# End of file
|
data/lib/skeem/version.rb
CHANGED
data/skeem.gemspec
CHANGED
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.authors = ['Dimitri Geshef']
|
40
40
|
spec.email = ['famished.tiger@yahoo.com']
|
41
41
|
|
42
|
-
spec.description
|
42
|
+
spec.description = <<-DESCR
|
43
43
|
Skeem is an interpreter of a subset of the Scheme programming language.
|
44
44
|
DESCR
|
45
45
|
spec.summary = <<-SUMMARY
|
@@ -53,6 +53,10 @@ SUMMARY
|
|
53
53
|
spec.require_paths = ['lib']
|
54
54
|
PkgExtending.pkg_files(spec)
|
55
55
|
PkgExtending.pkg_documentation(spec)
|
56
|
+
# Runtime dependencies
|
57
|
+
spec.add_dependency 'rley', '~> 0.6'
|
58
|
+
|
59
|
+
# Development dependencies
|
56
60
|
spec.add_development_dependency 'bundler', '~> 1.16'
|
57
61
|
spec.add_development_dependency 'rake', '~> 10.0'
|
58
62
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require_relative '../spec_helper' # Use the RSpec framework
|
2
|
+
require_relative '../../lib/skeem/tokenizer' # Load the class under test
|
3
|
+
|
4
|
+
module Skeem
|
5
|
+
describe Tokenizer do
|
6
|
+
def match_expectations(aTokenizer, theExpectations)
|
7
|
+
aTokenizer.tokens.each_with_index do |token, i|
|
8
|
+
terminal, lexeme = theExpectations[i]
|
9
|
+
expect(token.terminal).to eq(terminal)
|
10
|
+
expect(token.lexeme).to eq(lexeme)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
subject { Tokenizer.new('') }
|
15
|
+
|
16
|
+
context 'Initialization:' do
|
17
|
+
it 'should be initialized with a text to tokenize' do
|
18
|
+
expect { Tokenizer.new('(+ 2 3)') }.not_to raise_error
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should have its scanner initialized' do
|
22
|
+
expect(subject.scanner).to be_kind_of(StringScanner)
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'Delimiter and separator token recognition:' do
|
26
|
+
it 'should tokenize single char delimiters' do
|
27
|
+
subject.scanner.string = "( ) ' `"
|
28
|
+
tokens = subject.tokens
|
29
|
+
tokens.each { |token| expect(token).to be_kind_of(SToken) }
|
30
|
+
terminals = tokens.map(&:terminal)
|
31
|
+
prediction = %w[LPAREN RPAREN APOSTROPHE BACKQUOTE]
|
32
|
+
expect(terminals).to eq(prediction)
|
33
|
+
end
|
34
|
+
end # context
|
35
|
+
end # context
|
36
|
+
end # describe
|
37
|
+
end # module
|
data/spec/spec_helper.rb
CHANGED
@@ -1,14 +1,16 @@
|
|
1
1
|
require 'bundler/setup'
|
2
|
-
require '
|
2
|
+
require 'rspec' # Use the RSpec framework
|
3
|
+
require_relative '../lib/skeem'
|
3
4
|
|
4
5
|
RSpec.configure do |config|
|
5
6
|
# Enable flags like --only-failures and --next-failure
|
6
7
|
config.example_status_persistence_file_path = '.rspec_status'
|
7
8
|
|
8
|
-
# Disable RSpec exposing methods globally on `Module` and `main`
|
9
|
-
config.disable_monkey_patching!
|
10
|
-
|
11
9
|
config.expect_with :rspec do |c|
|
10
|
+
# Disable the `should` synta
|
12
11
|
c.syntax = :expect
|
13
12
|
end
|
13
|
+
|
14
|
+
# Display stack trace in case of failure
|
15
|
+
config.full_backtrace = true
|
14
16
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: skeem
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rley
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0.6'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0.6'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -71,8 +85,11 @@ files:
|
|
71
85
|
- Rakefile
|
72
86
|
- appveyor.yml
|
73
87
|
- lib/skeem.rb
|
88
|
+
- lib/skeem/stoken.rb
|
89
|
+
- lib/skeem/tokenizer.rb
|
74
90
|
- lib/skeem/version.rb
|
75
91
|
- skeem.gemspec
|
92
|
+
- spec/skeem/tokenizer_spec.rb
|
76
93
|
- spec/skeem_spec.rb
|
77
94
|
- spec/spec_helper.rb
|
78
95
|
homepage: https://github.com/famished-tiger/Skeem
|
@@ -102,4 +119,5 @@ specification_version: 4
|
|
102
119
|
summary: Skeem is an interpreter of a subset of the Scheme programming language. Scheme
|
103
120
|
is a descendent of the Lisp language.
|
104
121
|
test_files:
|
122
|
+
- spec/skeem/tokenizer_spec.rb
|
105
123
|
- spec/skeem_spec.rb
|