loxxy 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,225 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+ require 'rley'
5
+ require_relative '../datatype/all_datatypes'
6
+ require_relative 'literal'
7
+
8
+ module Loxxy
9
+ module FrontEnd
10
+ # A scanner (tokenizer) for the Lox language.
11
+ # Reference material:
12
+ # https://craftinginterpreters.com/the-lox-language.html
13
+ # Section 4.2.1 Token types
14
+ # Appendix A1.2 Lexical Grammar
15
+ # Responsibility: break input into a sequence of token objects.
16
+ # The tokenizer should recognize:
17
+ # Identifiers,
18
+ # Number literals including single digit
19
+ # String literals (quote delimited)
20
+ # Delimiters: e.g. parentheses '(', ')'
21
+ # Separators: e.g. comma
22
+ class Scanner
23
+ # @return [StringScanner] Low-level input scanner
24
+ attr_reader(:scanner)
25
+
26
+ # @return [Integer] The current line number
27
+ attr_reader(:lineno)
28
+
29
+ # @return [Integer] Position of last start of line in the input
30
+ attr_reader(:line_start)
31
+
32
+ # One or two special character tokens.
33
+ # These are enumerated in section 4.2.1 Token type
34
+ @@lexeme2name = {
35
+ '(' => 'LEFT_PAREN',
36
+ ')' => 'RIGHT_PAREN',
37
+ '{' => 'LEFT_BRACE',
38
+ '}' => 'RIGHT_BRACE',
39
+ ',' => 'COMMA',
40
+ '.' => 'DOT',
41
+ '-' => 'MINUS',
42
+ '+' => 'PLUS',
43
+ ';' => 'SEMICOLON',
44
+ '/' => 'SLASH',
45
+ '*' => 'STAR',
46
+ '!' => 'BANG',
47
+ '!=' => 'BANG_EQUAL',
48
+ '=' => 'EQUAL',
49
+ '==' => 'EQUAL_EQUAL',
50
+ '>' => 'GREATER',
51
+ '>=' => 'GREATER_EQUAL',
52
+ '<' => 'LESS',
53
+ '<=' => 'LESS_EQUAL',
54
+ }.freeze
55
+
56
+ # Here are all the implemented Lox keywords (in uppercase)
57
+ # These are enumerated in section 4.2.1 Token type
58
+ @@keywords = %w[
59
+ AND CLASS ELSE FALSE FUN FOR IF NIL OR
60
+ PRINT RETURN SUPER THIS TRUE VAR WHILE
61
+ ].map { |x| [x, x] }.to_h
62
+
63
+ class ScanError < StandardError; end
64
+
65
+ # Constructor. Initialize a tokenizer for Lox input.
66
+ # @param source [String] Lox text to tokenize.
67
+ def initialize(source = nil)
68
+ @scanner = StringScanner.new('')
69
+ start_with(source) if source
70
+ end
71
+
72
+ # Reset the tokenizer and make the given text, the current input.
73
+ # @param source [String] Lox text to tokenize.
74
+ def start_with(source)
75
+ @scanner.string = source
76
+ @lineno = 1
77
+ @line_start = 0
78
+ end
79
+
80
+ # Scan the source and return an array of tokens.
81
+ # @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
82
+ def tokens
83
+ tok_sequence = []
84
+ until @scanner.eos?
85
+ token = _next_token
86
+ tok_sequence << token unless token.nil?
87
+ end
88
+ tok_sequence << build_token('EOF', '')
89
+
90
+ return tok_sequence
91
+ end
92
+
93
+ private
94
+
95
+ def _next_token
96
+ skip_intertoken_spaces
97
+ curr_ch = scanner.peek(1)
98
+ return nil if curr_ch.nil? || curr_ch.empty?
99
+
100
+ token = nil
101
+
102
+ if "(){},.;/*".include? curr_ch
103
+ # Single delimiter or separator character
104
+ token = build_token(@@lexeme2name[curr_ch], scanner.getch)
105
+ elsif (lexeme = scanner.scan(/[+\-](?!\d)/))
106
+ # Minus or plus character not preceding a digit
107
+ token = build_token(@@lexeme2name[lexeme], lexeme)
108
+ elsif (lexeme = scanner.scan(/[!=><]=?/))
109
+ # One or two special character tokens
110
+ token = build_token(@@lexeme2name[lexeme], lexeme)
111
+ elsif (lexeme = scanner.scan(/-?\d+(?:\.\d+)?/))
112
+ token = build_token('NUMBER', lexeme)
113
+ elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/))
114
+ token = build_token('STRING', lexeme)
115
+ elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*/))
116
+ keyw = @@keywords[lexeme.upcase]
117
+ tok_type = keyw || 'IDENTIFIER'
118
+ token = build_token(tok_type, lexeme)
119
+ else # Unknown token
120
+ erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
121
+ sequel = scanner.scan(/.{1,20}/)
122
+ erroneous += sequel unless sequel.nil?
123
+ raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
124
+ end
125
+
126
+ return token
127
+ end
128
+
129
+ def build_token(aSymbolName, aLexeme)
130
+ begin
131
+ (value, symb) = convert_to(aLexeme, aSymbolName)
132
+ col = scanner.pos - aLexeme.size - @line_start + 1
133
+ pos = Rley::Lexical::Position.new(@lineno, col)
134
+ if value
135
+ token = Literal.new(value, aLexeme.dup, symb, pos)
136
+ else
137
+ token = Rley::Lexical::Token.new(aLexeme.dup, symb, pos)
138
+ end
139
+ rescue StandardError => e
140
+ puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
141
+ raise e
142
+ end
143
+
144
+ return token
145
+ end
146
+
147
+ def convert_to(aLexeme, aSymbolName)
148
+ symb = aSymbolName
149
+ case aSymbolName
150
+ when 'FALSE'
151
+ value = Datatype::False.instance
152
+ when 'NIL'
153
+ value = Datatype::Nil.instance
154
+ when 'NUMBER'
155
+ value = Datatype::Number.new(aLexeme)
156
+ when 'STRING'
157
+ value = Datatype::LXString.new(aLexeme)
158
+ when 'TRUE'
159
+ value = Datatype::True.instance
160
+ else
161
+ value = nil
162
+ end
163
+
164
+ return [value, symb]
165
+ end
166
+
167
+ # Skip non-significant whitespaces and comments.
168
+ # Advance the scanner until something significant is found.
169
+ def skip_intertoken_spaces
170
+ pre_pos = scanner.pos
171
+
172
+ loop do
173
+ ws_found = scanner.skip(/[ \t\f]+/) ? true : false
174
+ nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
175
+ if nl_found
176
+ ws_found = true
177
+ next_line
178
+ end
179
+ cmt_found = false
180
+ if scanner.scan(/\/(\/|\*)/)
181
+ cmt_found = true
182
+ case scanner.matched
183
+ when '//'
184
+ scanner.skip(/[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
185
+ next_line
186
+ when '/*'
187
+ skip_block_comment
188
+ next
189
+ end
190
+ end
191
+ break unless ws_found || cmt_found
192
+ end
193
+
194
+ curr_pos = scanner.pos
195
+ end
196
+
197
+ def skip_block_comment
198
+ nesting_level = 1
199
+ loop do
200
+ comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
201
+ unless comment_part
202
+ msg = "Unterminated '/* ... */' block comment on line #{lineno}"
203
+ raise ScanError, msg
204
+ end
205
+
206
+ case scanner.matched
207
+ when /(?:(?:\r\n)|\r|\n)/
208
+ next_line
209
+ when '*/'
210
+ nesting_level -= 1
211
+ break if nesting_level.zero?
212
+ when '/*'
213
+ nesting_level += 1
214
+ end
215
+ end
216
+ end
217
+
218
+ def next_line
219
+ @lineno += 1
220
+ @line_start = scanner.pos
221
+ end
222
+ end # class
223
+ end # module
224
+ end # module
225
+ # End of file
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loxxy
4
+ VERSION = '0.0.3'
5
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path('lib', __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'loxxy/version'
6
+
7
+ # Implementation module
8
+ module PkgExtending
9
+ def self.pkg_files(aPackage)
10
+ file_list = Dir[
11
+ '.rubocop.yml',
12
+ '.rspec',
13
+ '.travis.yml',
14
+ '.yardopts',
15
+ 'Gemfile',
16
+ 'Rakefile',
17
+ 'CHANGELOG.md',
18
+ 'CODE_OF_CONDUCT.md',
19
+ 'LICENSE.txt',
20
+ 'README.md',
21
+ 'loxxy.gemspec',
22
+ 'bin/*.rb',
23
+ 'lib/*.*',
24
+ 'lib/**/*.rb',
25
+ 'spec/**/*.rb'
26
+ ]
27
+ aPackage.files = file_list
28
+ aPackage.test_files = Dir['spec/**/*_spec.rb']
29
+ aPackage.require_path = 'lib'
30
+ end
31
+
32
+ def self.pkg_documentation(aPackage)
33
+ aPackage.rdoc_options << '--charset=UTF-8 --exclude="examples|spec"'
34
+ aPackage.extra_rdoc_files = ['README.md']
35
+ end
36
+ end # module
37
+
38
+ Gem::Specification.new do |spec|
39
+ spec.name = 'loxxy'
40
+ spec.version = Loxxy::VERSION
41
+ spec.authors = ['Dimitri Geshef']
42
+ spec.email = ['famished.tiger@yahoo.com']
43
+ spec.summary = %q{An implementation of the Lox programming language. WIP}
44
+ spec.description = %q{An implementation of the Lox programming language. WIP}
45
+ spec.homepage = 'https://github.com/famished-tiger/loxxy'
46
+ spec.license = 'MIT'
47
+ spec.required_ruby_version = '~> 2.4'
48
+
49
+ spec.bindir = 'exe'
50
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
51
+ spec.require_paths = ['lib']
52
+
53
+ PkgExtending.pkg_files(spec)
54
+ PkgExtending.pkg_documentation(spec)
55
+
56
+ # Runtime dependencies
57
+ spec.add_dependency 'rley', '~> 0.7.06'
58
+
59
+ # Development dependencies
60
+ spec.add_development_dependency 'bundler', '~> 2.0'
61
+ spec.add_development_dependency 'rake', '~> 12.0'
62
+ spec.add_development_dependency 'rspec', '~> 3.0'
63
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../spec_helper' # Use the RSpec framework
4
+ require_relative '../../lib/loxxy/front_end/parser' # Load the class under test
5
+
6
+ module Loxxy
7
+ module FrontEnd
8
+ describe Parser do
9
+ subject { Parser.new }
10
+
11
+ context 'Initialization:' do
12
+ it 'should be initialized without argument' do
13
+ expect { Parser.new }.not_to raise_error
14
+ end
15
+
16
+ it 'should have its parse engine initialized' do
17
+ expect(subject.engine).to be_kind_of(Rley::Engine)
18
+ end
19
+ end # context
20
+
21
+ context 'Parsing blank files:' do
22
+ def check_empty_input_result(aParseTree)
23
+ # Parse results MUST to comply to grammar rule:
24
+ # program => declaration_star EOF
25
+ # where the declaration_star MUST be empty
26
+ expect(aParseTree.root.symbol.name).to eq('program')
27
+ (decls, eof) = aParseTree.root.subnodes
28
+ expect(decls).to be_kind_of(Rley::PTree::NonTerminalNode)
29
+ expect(decls.symbol.name).to eq('declaration_star')
30
+ expect(decls.subnodes).to be_empty
31
+ expect(eof).to be_kind_of(Rley::PTree::TerminalNode)
32
+ expect(eof.symbol.name).to eq('EOF')
33
+ end
34
+
35
+ it 'should cope with an empty input' do
36
+ ptree = subject.parse('')
37
+ check_empty_input_result(ptree)
38
+ end
39
+
40
+ it 'should cope with whitespaces only input' do
41
+ ptree = subject.parse(' ' * 80 + "\n" * 20)
42
+ check_empty_input_result(ptree)
43
+ end
44
+
45
+ it 'should cope with comments only input' do
46
+ input = +''
47
+ %w[First Second Third].each do |ordinal|
48
+ input << "// #{ordinal} comment line\r\n"
49
+ end
50
+ ptree = subject.parse(input)
51
+ check_empty_input_result(ptree)
52
+ end
53
+ end # context
54
+ end # describe
55
+ end # module
56
+ end # module
@@ -0,0 +1,229 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../spec_helper' # Use the RSpec framework
4
+
5
+ # Load the class under test
6
+ require_relative '../../lib/loxxy/front_end/scanner'
7
+
8
+ module Loxxy
9
+ module FrontEnd
10
+ describe Scanner do
11
+ # Utility method for comparing actual and expected token
12
+ # sequence. The final EOF is removed from the input sequence.
13
+ def match_expectations(aScanner, theExpectations)
14
+ tokens = aScanner.tokens
15
+ eof_token = tokens.pop
16
+ expect(eof_token.terminal).to eq('EOF')
17
+
18
+ tokens.each_with_index do |token, i|
19
+ terminal, lexeme = theExpectations[i]
20
+ expect(token.terminal).to eq(terminal)
21
+ expect(token.lexeme).to eq(lexeme)
22
+ end
23
+ end
24
+
25
+ let(:sample_text) { 'print "Hello, world";' }
26
+ subject { Scanner.new }
27
+
28
+ context 'Initialization:' do
29
+ it 'could be initialized with a text to tokenize or...' do
30
+ expect { Scanner.new(sample_text) }.not_to raise_error
31
+ end
32
+
33
+ it 'could be initialized without argument...' do
34
+ expect { Scanner.new }.not_to raise_error
35
+ end
36
+
37
+ it 'should have its scanner initialized' do
38
+ expect(subject.scanner).to be_kind_of(StringScanner)
39
+ end
40
+ end # context
41
+
42
+ context 'Input tokenization:' do
43
+ it 'should recognize single special character token' do
44
+ input = '(){},.-+;*/'
45
+ subject.start_with(input)
46
+ expectations = [
47
+ # [token lexeme]
48
+ %w[LEFT_PAREN (],
49
+ %w[RIGHT_PAREN )],
50
+ %w[LEFT_BRACE {],
51
+ %w[RIGHT_BRACE }],
52
+ %w[COMMA ,],
53
+ %w[DOT .],
54
+ %w[MINUS -],
55
+ %w[PLUS +],
56
+ %w[SEMICOLON ;],
57
+ %w[STAR *],
58
+ %w[SLASH /]
59
+ ]
60
+ match_expectations(subject, expectations)
61
+ end
62
+
63
+ it 'should recognize one or two special character tokens' do
64
+ input = '! != = == > >= < <='
65
+ subject.start_with(input)
66
+ expectations = [
67
+ # [token lexeme]
68
+ %w[BANG !],
69
+ %w[BANG_EQUAL !=],
70
+ %w[EQUAL =],
71
+ %w[EQUAL_EQUAL ==],
72
+ %w[GREATER >],
73
+ %w[GREATER_EQUAL >=],
74
+ %w[LESS <],
75
+ %w[LESS_EQUAL <=]
76
+ ]
77
+ match_expectations(subject, expectations)
78
+ end
79
+
80
+ it 'should recognize non-datatype keywords' do
81
+ keywords =<<-LOX_END
82
+ and class else fun for if or
83
+ print return super this var while
84
+ LOX_END
85
+ subject.start_with(keywords)
86
+ expectations = [
87
+ # [token lexeme]
88
+ %w[AND and],
89
+ %w[CLASS class],
90
+ %w[ELSE else],
91
+ %w[FUN fun],
92
+ %w[FOR for],
93
+ %w[IF if],
94
+ %w[OR or],
95
+ %w[PRINT print],
96
+ %w[RETURN return],
97
+ %w[SUPER super],
98
+ %w[THIS this],
99
+ %w[VAR var],
100
+ %w[WHILE while]
101
+ ]
102
+ match_expectations(subject, expectations)
103
+ end
104
+
105
+ it 'should recognize a false boolean token' do
106
+ subject.start_with('false')
107
+ token_false = subject.tokens[0]
108
+ expect(token_false).to be_kind_of(Literal)
109
+ expect(token_false.terminal).to eq('FALSE')
110
+ expect(token_false.lexeme).to eq('false')
111
+ expect(token_false.value).to be_kind_of(Datatype::False)
112
+ expect(token_false.value.value).to be_falsy
113
+ end
114
+
115
+ it 'should recognize a true boolean token' do
116
+ subject.start_with('true')
117
+ token_true = subject.tokens[0]
118
+ expect(token_true).to be_kind_of(Literal)
119
+ expect(token_true.terminal).to eq('TRUE')
120
+ expect(token_true.lexeme).to eq('true')
121
+ expect(token_true.value).to be_kind_of(Datatype::True)
122
+ expect(token_true.value.value).to be_truthy
123
+ end
124
+
125
+ it 'should recognize number values' do
126
+ input = <<-LOX_END
127
+ 123 987654
128
+ 0 -0
129
+ 123.456 -0.001
130
+ LOX_END
131
+
132
+ expectations = [
133
+ ['123', 123],
134
+ ['987654', 987654],
135
+ ['0', 0],
136
+ ['-0', 0],
137
+ ['123.456', 123.456],
138
+ ['-0.001', -0.001]
139
+ ]
140
+
141
+ subject.start_with(input)
142
+ subject.tokens[0..-2].each_with_index do |tok, i|
143
+ expect(tok).to be_kind_of(Literal)
144
+ expect(tok.terminal).to eq('NUMBER')
145
+ (lexeme, val) = expectations[i]
146
+ expect(tok.lexeme).to eq(lexeme)
147
+ expect(tok.value).to be_kind_of(Datatype::Number)
148
+ expect(tok.value.value).to eq(val)
149
+ end
150
+ end
151
+
152
+ it 'should recognize leading and trailing dots as distinct tokens' do
153
+ input = '.456 123.'
154
+
155
+ subject.start_with(input)
156
+ tokens = subject.tokens[0..-2]
157
+ expect(tokens[0]).to be_kind_of(Rley::Lexical::Token)
158
+ expect(tokens[0].terminal).to eq('DOT')
159
+ expect(tokens[1]).to be_kind_of(Literal)
160
+ expect(tokens[1].terminal).to eq('NUMBER')
161
+ expect(tokens[1].value.value).to eq(456)
162
+ expect(tokens[2]).to be_kind_of(Literal)
163
+ expect(tokens[2].terminal).to eq('NUMBER')
164
+ expect(tokens[2].value.value).to eq(123)
165
+ expect(tokens[3]).to be_kind_of(Rley::Lexical::Token)
166
+ expect(tokens[3].terminal).to eq('DOT')
167
+ end
168
+
169
+ it 'should recognize string values' do
170
+ input =<<-LOX_END
171
+ ""
172
+ "string"
173
+ "123"
174
+ LOX_END
175
+
176
+ expectations = [
177
+ '',
178
+ 'string',
179
+ '123'
180
+ ]
181
+
182
+ subject.start_with(input)
183
+ subject.tokens[0..-2].each_with_index do |str, i|
184
+ expect(str).to be_kind_of(Literal)
185
+ expect(str.terminal).to eq('STRING')
186
+ val = expectations[i]
187
+ expect(str.value).to be_kind_of(Datatype::LXString)
188
+ expect(str.value.value).to eq(val)
189
+ end
190
+ end
191
+
192
+ it 'should recognize a nil token' do
193
+ subject.start_with('nil')
194
+ token_nil = subject.tokens[0]
195
+ expect(token_nil).to be_kind_of(Literal)
196
+ expect(token_nil.terminal).to eq('NIL')
197
+ expect(token_nil.lexeme).to eq('nil')
198
+ expect(token_nil.value).to be_kind_of(Datatype::Nil)
199
+ end
200
+ end # context
201
+
202
+ context 'Handling comments:' do
203
+ it 'should cope with one line comment only' do
204
+ subject.start_with('// comment')
205
+
206
+ # No token found, except eof marker
207
+ eof_token = subject.tokens[0]
208
+ expect(eof_token.terminal).to eq('EOF')
209
+ end
210
+
211
+ it 'should skip end of line comments' do
212
+ input = <<-LOX_END
213
+ // first comment
214
+ print "ok"; // second comment
215
+ // third comment
216
+ LOX_END
217
+ subject.start_with(input)
218
+ expectations = [
219
+ # [token lexeme]
220
+ %w[PRINT print],
221
+ %w[STRING "ok"],
222
+ %w[SEMICOLON ;]
223
+ ]
224
+ match_expectations(subject, expectations)
225
+ end
226
+ end # context
227
+ end # describe
228
+ end # module
229
+ end # module