RubyGems - loxxy - Versions diffs - 0.0.3 - Mend

loxxy 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +7 -0
data/.rspec +3 -0
data/.rubocop.yml +379 -0
data/.travis.yml +6 -0
data/CHANGELOG.md +24 -0
data/Gemfile +6 -0
data/LICENSE.txt +21 -0
data/README.md +60 -0
data/Rakefile +8 -0
data/lib/loxxy.rb +8 -0
data/lib/loxxy/datatype/all_datatypes.rb +7 -0
data/lib/loxxy/datatype/boolean.rb +13 -0
data/lib/loxxy/datatype/builtin_datatype.rb +25 -0
data/lib/loxxy/datatype/false.rb +20 -0
data/lib/loxxy/datatype/lx_string.rb +27 -0
data/lib/loxxy/datatype/nil.rb +20 -0
data/lib/loxxy/datatype/number.rb +35 -0
data/lib/loxxy/datatype/true.rb +20 -0
data/lib/loxxy/front_end/grammar.rb +152 -0
data/lib/loxxy/front_end/literal.rb +25 -0
data/lib/loxxy/front_end/parser.rb +40 -0
data/lib/loxxy/front_end/scanner.rb +225 -0
data/lib/loxxy/version.rb +5 -0
data/loxxy.gemspec +63 -0
data/spec/front_end/parser_spec.rb +56 -0
data/spec/front_end/scanner_spec.rb +229 -0
data/spec/loxxy_spec.rb +9 -0
data/spec/spec_helper.rb +15 -0
metadata +131 -0

data/lib/loxxy/front_end/scanner.rb ADDED

@@ -0,0 +1,225 @@
+# frozen_string_literal: true
+require 'strscan'
+require 'rley'
+require_relative '../datatype/all_datatypes'
+require_relative 'literal'
+module Loxxy
+  module FrontEnd
+    # A scanner (tokenizer) for the Lox language.
+    # Reference material:
+    #   https://craftinginterpreters.com/the-lox-language.html
+    #   Section 4.2.1 Token types
+    #   Appendix A1.2 Lexical Grammar
+    # Responsibility: break input into a sequence of token objects.
+    # The tokenizer should recognize:
+    # Identifiers,
+    # Number literals including single digit
+    # String literals (quote delimited)
+    # Delimiters: e.g. parentheses '(',  ')'
+    # Separators: e.g. comma
+    class Scanner
+      # @return [StringScanner] Low-level input scanner
+      attr_reader(:scanner)
+      # @return [Integer] The current line number
+      attr_reader(:lineno)
+      # @return [Integer] Position of last start of line in the input
+      attr_reader(:line_start)
+      # One or two special character tokens.
+      # These are enumerated in section 4.2.1 Token type
+      @@lexeme2name = {
+        '(' => 'LEFT_PAREN',
+        ')' => 'RIGHT_PAREN',
+        '{' => 'LEFT_BRACE',
+        '}' => 'RIGHT_BRACE',
+        ',' => 'COMMA',
+        '.' => 'DOT',
+        '-' =>  'MINUS',
+        '+' => 'PLUS',
+        ';' => 'SEMICOLON',
+        '/' => 'SLASH',
+        '*' => 'STAR',
+        '!' => 'BANG',
+        '!=' => 'BANG_EQUAL',
+        '=' => 'EQUAL',
+        '==' => 'EQUAL_EQUAL',
+        '>' => 'GREATER',
+        '>=' => 'GREATER_EQUAL',
+        '<' => 'LESS',
+        '<=' => 'LESS_EQUAL',
+      }.freeze
+      # Here are all the implemented Lox keywords (in uppercase)
+      # These are enumerated in section 4.2.1 Token type
+      @@keywords = %w[
+        AND CLASS ELSE FALSE FUN FOR IF NIL OR
+        PRINT RETURN SUPER THIS TRUE VAR WHILE
+      ].map { |x| [x, x] }.to_h
+      class ScanError < StandardError; end
+      # Constructor. Initialize a tokenizer for Lox input.
+      # @param source [String] Lox text to tokenize.
+      def initialize(source = nil)
+        @scanner = StringScanner.new('')
+        start_with(source) if source
+      end
+      # Reset the tokenizer and make the given text, the current input.
+      # @param source [String] Lox text to tokenize.
+      def start_with(source)
+        @scanner.string = source
+        @lineno = 1
+        @line_start = 0
+      end
+      # Scan the source and return an array of tokens.
+      # @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
+      def tokens
+        tok_sequence = []
+        until @scanner.eos?
+          token = _next_token
+          tok_sequence << token unless token.nil?
+        end
+        tok_sequence << build_token('EOF', '')
+        return tok_sequence
+      end
+      private
+      def _next_token
+        skip_intertoken_spaces
+        curr_ch = scanner.peek(1)
+        return nil if curr_ch.nil? || curr_ch.empty?
+        token = nil
+        if "(){},.;/*".include? curr_ch
+          # Single delimiter or separator character
+          token = build_token(@@lexeme2name[curr_ch], scanner.getch)
+        elsif (lexeme = scanner.scan(/[+\-](?!\d)/))
+          # Minus or plus character not preceding a digit
+          token = build_token(@@lexeme2name[lexeme], lexeme)
+        elsif (lexeme = scanner.scan(/[!=><]=?/))
+          # One or two special character tokens
+          token = build_token(@@lexeme2name[lexeme], lexeme)
+        elsif (lexeme = scanner.scan(/-?\d+(?:\.\d+)?/))
+          token = build_token('NUMBER', lexeme)
+        elsif (lexeme = scanner.scan(/"(?:\\"|[^"])*"/))
+          token = build_token('STRING', lexeme)
+        elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*/))
+          keyw = @@keywords[lexeme.upcase]
+          tok_type = keyw || 'IDENTIFIER'
+          token = build_token(tok_type, lexeme)
+        else # Unknown token
+          erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
+          sequel = scanner.scan(/.{1,20}/)
+          erroneous += sequel unless sequel.nil?
+          raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
+        end
+        return token
+      end
+      def build_token(aSymbolName, aLexeme)
+        begin
+          (value, symb) = convert_to(aLexeme, aSymbolName)
+          col = scanner.pos - aLexeme.size - @line_start + 1
+          pos = Rley::Lexical::Position.new(@lineno, col)
+          if value
+            token = Literal.new(value, aLexeme.dup, symb, pos)
+          else
+            token = Rley::Lexical::Token.new(aLexeme.dup, symb, pos)
+          end
+        rescue StandardError => e
+          puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
+          raise e
+        end
+        return token
+      end
+      def convert_to(aLexeme, aSymbolName)
+        symb = aSymbolName
+        case aSymbolName
+          when 'FALSE'
+            value = Datatype::False.instance
+          when 'NIL'
+            value = Datatype::Nil.instance
+          when 'NUMBER'
+            value = Datatype::Number.new(aLexeme)
+          when 'STRING'
+            value = Datatype::LXString.new(aLexeme)
+          when 'TRUE'
+            value = Datatype::True.instance
+          else
+            value = nil
+        end
+        return [value, symb]
+      end
+      # Skip non-significant whitespaces and comments.
+      # Advance the scanner until something significant is found.
+      def skip_intertoken_spaces
+        pre_pos = scanner.pos
+        loop do
+          ws_found = scanner.skip(/[ \t\f]+/) ? true : false
+          nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
+          if nl_found
+            ws_found = true
+            next_line
+          end
+          cmt_found = false
+          if scanner.scan(/\/(\/|\*)/)
+            cmt_found = true
+            case scanner.matched
+              when '//'
+                scanner.skip(/[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
+                next_line
+              when '/*'
+                skip_block_comment
+                next
+            end
+          end
+          break unless ws_found || cmt_found
+        end
+        curr_pos = scanner.pos
+      end
+      def skip_block_comment
+        nesting_level = 1
+        loop do
+          comment_part = scanner.scan_until(/(?:\/\*)|(?:\*\/)|(?:(?:\r\n)|\r|\n)/)
+          unless comment_part
+            msg = "Unterminated '/* ... */' block comment on line #{lineno}"
+            raise ScanError, msg
+          end
+          case scanner.matched
+            when /(?:(?:\r\n)|\r|\n)/
+              next_line
+            when '*/'
+              nesting_level -= 1
+              break if nesting_level.zero?
+            when '/*'
+              nesting_level += 1
+          end
+        end
+      end
+      def next_line
+        @lineno += 1
+        @line_start = scanner.pos
+      end
+    end # class
+  end # module
+end # module
+# End of file

data/lib/loxxy/version.rb ADDED

@@ -0,0 +1,5 @@
+# frozen_string_literal: true
+module Loxxy
+  VERSION = '0.0.3'
+end

data/loxxy.gemspec ADDED

@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+lib = File.expand_path('lib', __dir__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'loxxy/version'
+# Implementation module
+module PkgExtending
+  def self.pkg_files(aPackage)
+    file_list = Dir[
+      '.rubocop.yml',
+      '.rspec',
+      '.travis.yml',
+      '.yardopts',
+      'Gemfile',
+      'Rakefile',
+      'CHANGELOG.md',
+      'CODE_OF_CONDUCT.md',
+      'LICENSE.txt',
+      'README.md',
+      'loxxy.gemspec',
+      'bin/*.rb',
+      'lib/*.*',
+      'lib/**/*.rb',
+      'spec/**/*.rb'
+    ]
+    aPackage.files = file_list
+    aPackage.test_files = Dir['spec/**/*_spec.rb']
+    aPackage.require_path = 'lib'
+  end
+  def self.pkg_documentation(aPackage)
+    aPackage.rdoc_options << '--charset=UTF-8 --exclude="examples|spec"'
+    aPackage.extra_rdoc_files = ['README.md']
+  end
+end # module
+Gem::Specification.new do |spec|
+  spec.name          = 'loxxy'
+  spec.version       = Loxxy::VERSION
+  spec.authors       = ['Dimitri Geshef']
+  spec.email         = ['famished.tiger@yahoo.com']
+  spec.summary       = %q{An implementation of the Lox programming language. WIP}
+  spec.description   = %q{An implementation of the Lox programming language. WIP}
+  spec.homepage      = 'https://github.com/famished-tiger/loxxy'
+  spec.license       = 'MIT'
+  spec.required_ruby_version = '~> 2.4'
+  spec.bindir        = 'exe'
+  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.require_paths = ['lib']
+  PkgExtending.pkg_files(spec)
+  PkgExtending.pkg_documentation(spec)
+  # Runtime dependencies
+  spec.add_dependency 'rley', '~> 0.7.06'
+  # Development dependencies
+  spec.add_development_dependency 'bundler', '~> 2.0'
+  spec.add_development_dependency 'rake', '~> 12.0'
+  spec.add_development_dependency 'rspec', '~> 3.0'
+end

data/spec/front_end/parser_spec.rb ADDED

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+require_relative '../spec_helper' # Use the RSpec framework
+require_relative '../../lib/loxxy/front_end/parser' # Load the class under test
+module Loxxy
+  module FrontEnd
+    describe Parser do
+      subject { Parser.new }
+      context 'Initialization:' do
+        it 'should be initialized without argument' do
+          expect { Parser.new }.not_to raise_error
+        end
+        it 'should have its parse engine initialized' do
+          expect(subject.engine).to be_kind_of(Rley::Engine)
+        end
+      end # context
+      context 'Parsing blank files:' do
+        def check_empty_input_result(aParseTree)
+          # Parse results MUST to comply to grammar rule:
+          #   program => declaration_star EOF
+          #   where the declaration_star MUST be empty
+          expect(aParseTree.root.symbol.name).to eq('program')
+          (decls, eof) = aParseTree.root.subnodes
+          expect(decls).to be_kind_of(Rley::PTree::NonTerminalNode)
+          expect(decls.symbol.name).to eq('declaration_star')
+          expect(decls.subnodes).to be_empty
+          expect(eof).to be_kind_of(Rley::PTree::TerminalNode)
+          expect(eof.symbol.name).to eq('EOF')
+        end
+        it 'should cope with an empty input' do
+          ptree = subject.parse('')
+          check_empty_input_result(ptree)
+        end
+        it 'should cope with whitespaces only input' do
+          ptree = subject.parse(' ' * 80 + "\n" * 20)
+          check_empty_input_result(ptree)
+        end
+        it 'should cope with comments only input' do
+          input = +''
+          %w[First Second Third].each do |ordinal|
+            input << "// #{ordinal} comment line\r\n"
+          end
+          ptree = subject.parse(input)
+          check_empty_input_result(ptree)
+        end
+      end # context
+    end # describe
+  end # module
+end # module

data/spec/front_end/scanner_spec.rb ADDED

@@ -0,0 +1,229 @@
+# frozen_string_literal: true
+require_relative '../spec_helper' # Use the RSpec framework
+# Load the class under test
+require_relative '../../lib/loxxy/front_end/scanner'
+module Loxxy
+  module FrontEnd
+    describe Scanner do
+      # Utility method for comparing actual and expected token
+      # sequence. The final EOF is removed from the input sequence.
+      def match_expectations(aScanner, theExpectations)
+        tokens = aScanner.tokens
+        eof_token = tokens.pop
+        expect(eof_token.terminal).to eq('EOF')
+        tokens.each_with_index do |token, i|
+          terminal, lexeme = theExpectations[i]
+          expect(token.terminal).to eq(terminal)
+          expect(token.lexeme).to eq(lexeme)
+        end
+      end
+      let(:sample_text) { 'print "Hello, world";' }
+      subject { Scanner.new }
+      context 'Initialization:' do
+        it 'could be initialized with a text to tokenize or...' do
+          expect { Scanner.new(sample_text) }.not_to raise_error
+        end
+        it 'could be initialized without argument...' do
+          expect { Scanner.new }.not_to raise_error
+        end
+        it 'should have its scanner initialized' do
+          expect(subject.scanner).to be_kind_of(StringScanner)
+        end
+      end # context
+      context 'Input tokenization:' do
+        it 'should recognize single special character token' do
+          input = '(){},.-+;*/'
+          subject.start_with(input)
+          expectations = [
+            # [token lexeme]
+            %w[LEFT_PAREN (],
+            %w[RIGHT_PAREN )],
+            %w[LEFT_BRACE {],
+            %w[RIGHT_BRACE }],
+            %w[COMMA ,],
+            %w[DOT .],
+            %w[MINUS -],
+            %w[PLUS +],
+            %w[SEMICOLON ;],
+            %w[STAR *],
+            %w[SLASH /]
+          ]
+          match_expectations(subject, expectations)
+        end
+        it 'should recognize one or two special character tokens' do
+          input = '! != = == > >= < <='
+          subject.start_with(input)
+          expectations = [
+            # [token lexeme]
+            %w[BANG !],
+            %w[BANG_EQUAL !=],
+            %w[EQUAL =],
+            %w[EQUAL_EQUAL ==],
+            %w[GREATER >],
+            %w[GREATER_EQUAL >=],
+            %w[LESS <],
+            %w[LESS_EQUAL <=]
+          ]
+          match_expectations(subject, expectations)
+        end
+        it 'should recognize non-datatype keywords' do
+          keywords =<<-LOX_END
+            and class else fun for if or
+            print return super this var while
+LOX_END
+          subject.start_with(keywords)
+          expectations = [
+            # [token lexeme]
+            %w[AND and],
+            %w[CLASS class],
+            %w[ELSE else],
+            %w[FUN fun],
+            %w[FOR for],
+            %w[IF if],
+            %w[OR or],
+            %w[PRINT print],
+            %w[RETURN return],
+            %w[SUPER super],
+            %w[THIS this],
+            %w[VAR var],
+            %w[WHILE while]
+          ]
+          match_expectations(subject, expectations)
+        end
+        it 'should recognize a false boolean token' do
+          subject.start_with('false')
+          token_false = subject.tokens[0]
+          expect(token_false).to be_kind_of(Literal)
+          expect(token_false.terminal).to eq('FALSE')
+          expect(token_false.lexeme).to eq('false')
+          expect(token_false.value).to be_kind_of(Datatype::False)
+          expect(token_false.value.value).to be_falsy
+        end
+        it 'should recognize a true boolean token' do
+          subject.start_with('true')
+          token_true = subject.tokens[0]
+          expect(token_true).to be_kind_of(Literal)
+          expect(token_true.terminal).to eq('TRUE')
+          expect(token_true.lexeme).to eq('true')
+          expect(token_true.value).to be_kind_of(Datatype::True)
+          expect(token_true.value.value).to be_truthy
+        end
+        it 'should recognize number values' do
+          input = <<-LOX_END
+          123     987654
+          0       -0
+          123.456 -0.001
+LOX_END
+          expectations = [
+            ['123', 123],
+            ['987654', 987654],
+            ['0', 0],
+            ['-0', 0],
+            ['123.456', 123.456],
+            ['-0.001', -0.001]
+          ]
+          subject.start_with(input)
+          subject.tokens[0..-2].each_with_index do |tok, i|
+            expect(tok).to be_kind_of(Literal)
+            expect(tok.terminal).to eq('NUMBER')
+            (lexeme, val) = expectations[i]
+            expect(tok.lexeme).to eq(lexeme)
+            expect(tok.value).to be_kind_of(Datatype::Number)
+            expect(tok.value.value).to eq(val)
+          end
+        end
+        it 'should recognize leading and trailing dots as distinct tokens' do
+          input = '.456 123.'
+          subject.start_with(input)
+          tokens = subject.tokens[0..-2]
+          expect(tokens[0]).to be_kind_of(Rley::Lexical::Token)
+          expect(tokens[0].terminal).to eq('DOT')
+          expect(tokens[1]).to be_kind_of(Literal)
+          expect(tokens[1].terminal).to eq('NUMBER')
+          expect(tokens[1].value.value).to eq(456)
+          expect(tokens[2]).to be_kind_of(Literal)
+          expect(tokens[2].terminal).to eq('NUMBER')
+          expect(tokens[2].value.value).to eq(123)
+          expect(tokens[3]).to be_kind_of(Rley::Lexical::Token)
+          expect(tokens[3].terminal).to eq('DOT')
+        end
+        it 'should recognize string values' do
+          input =<<-LOX_END
+          ""
+          "string"
+          "123"
+LOX_END
+          expectations = [
+            '',
+            'string',
+            '123'
+          ]
+          subject.start_with(input)
+          subject.tokens[0..-2].each_with_index do |str, i|
+            expect(str).to be_kind_of(Literal)
+            expect(str.terminal).to eq('STRING')
+            val = expectations[i]
+            expect(str.value).to be_kind_of(Datatype::LXString)
+            expect(str.value.value).to eq(val)
+          end
+        end
+        it 'should recognize a nil token' do
+          subject.start_with('nil')
+          token_nil = subject.tokens[0]
+          expect(token_nil).to be_kind_of(Literal)
+          expect(token_nil.terminal).to eq('NIL')
+          expect(token_nil.lexeme).to eq('nil')
+          expect(token_nil.value).to be_kind_of(Datatype::Nil)
+        end
+      end # context
+      context 'Handling comments:' do
+        it 'should cope with one line comment only' do
+          subject.start_with('// comment')
+          # No token found, except eof marker
+          eof_token = subject.tokens[0]
+          expect(eof_token.terminal).to eq('EOF')
+        end
+        it 'should skip end of line comments' do
+          input = <<-LOX_END
+            // first comment
+            print "ok"; // second comment
+            // third comment
+LOX_END
+          subject.start_with(input)
+          expectations = [
+            # [token lexeme]
+            %w[PRINT print],
+            %w[STRING "ok"],
+            %w[SEMICOLON ;]
+          ]
+          match_expectations(subject, expectations)
+        end
+      end # context
+    end # describe
+  end # module
+end # module