RubyGems - lex - Versions diffs - 0.1.0 - Mend

lex 0.1.0

Files changed (36) hide show

checksums.yaml +7 -0
data/.gitignore +14 -0
data/.rspec +3 -0
data/.ruby-version +1 -0
data/.travis.yml +22 -0
data/Gemfile +19 -0
data/LICENSE.txt +22 -0
data/README.md +423 -0
data/Rakefile +8 -0
data/lex.gemspec +22 -0
data/lib/lex.rb +22 -0
data/lib/lex/lexeme.rb +27 -0
data/lib/lex/lexer.rb +210 -0
data/lib/lex/lexer/dsl.rb +49 -0
data/lib/lex/lexer/rule_dsl.rb +165 -0
data/lib/lex/lexers.rb +11 -0
data/lib/lex/lexers/html.rb +8 -0
data/lib/lex/linter.rb +114 -0
data/lib/lex/logger.rb +21 -0
data/lib/lex/source_line.rb +13 -0
data/lib/lex/state.rb +37 -0
data/lib/lex/token.rb +47 -0
data/lib/lex/version.rb +5 -0
data/spec/spec_helper.rb +50 -0
data/spec/unit/error_spec.rb +42 -0
data/spec/unit/keyword_spec.rb +34 -0
data/spec/unit/lex_spec.rb +60 -0
data/spec/unit/position_spec.rb +94 -0
data/spec/unit/rule_spec.rb +63 -0
data/spec/unit/state/clone_spec.rb +15 -0
data/spec/unit/states_spec.rb +194 -0
data/spec/unit/tokens_spec.rb +32 -0
data/tasks/console.rake +10 -0
data/tasks/coverage.rake +11 -0
data/tasks/spec.rake +29 -0
metadata +104 -0

@@ -0,0 +1,11 @@
+# coding: utf-8
+require 'lex'
+# Lexer implementations
+#
+# @note This file is not normally available. You must require
+# `lex/lexers` to load it.
+lexers = ::File.expand_path(::File.join('..', 'lexers'), __FILE__)
+$LOAD_PATH.unshift(lexers) unless $LOAD_PATH.include?(lexers)

data/lib/lex/lexers/html.rb ADDED

@@ -0,0 +1,8 @@
+# coding: utf-8
+module Lex
+  module Lexers
+    class HTML < Lexer
+    end
+  end # Lexers
+end # Lex

data/lib/lex/linter.rb ADDED

@@ -0,0 +1,114 @@
+# coding: utf-8
+module Lex
+  # A class responsible for checking lexer definitions
+  #
+  # @api public
+  class Linter
+    IDENTIFIER_RE = /^[a-zA-Z0-9]+$/.freeze
+    # Failure raised by +complain+
+    Failure = Class.new(StandardError)
+    # Run linting of lexer
+    #
+    # @param [Lex::Lexer]
+    #
+    # @raise [Lex::Linter::Failure]
+    #
+    # @api public
+    def lint(lexer)
+      validate_tokens(lexer)
+      validate_states(lexer)
+      validate_rules(lexer)
+    end
+    private
+    # Check if token has valid name
+    #
+    # @param [Symbol,String] value
+    #   token to check
+    #
+    # @return [Boolean]
+    #
+    # @api private
+    def identifier?(value)
+      value =~ IDENTIFIER_RE
+    end
+    # Validate provided tokens
+    #
+    # @api private
+    def validate_tokens(lexer)
+      if lexer.lex_tokens.empty?
+        complain("No token list defined")
+      end
+      if !lexer.lex_tokens.respond_to?(:to_ary)
+        complain("Tokens must be a list or enumerable")
+      end
+      terminals = []
+      lexer.lex_tokens.each do |token|
+        if !identifier?(token)
+          complain("Bad token name `#{token}`")
+        end
+        if terminals.include?(token)
+          complain("Token `#{token}` already defined")
+        end
+        terminals << token
+      end
+    end
+    # Validate provided state names
+    #
+    # @api private
+    def validate_states(lexer)
+      if !lexer.state_info.respond_to?(:each_pair)
+        complain("States must be defined as a hash")
+      end
+      lexer.state_info.each do |state_name, state_type|
+        if ![:inclusive, :exclusive].include?(state_type)
+          complain("State type for state #{state_name}" \
+                   " must be :inclusive or :exclusive")
+        end
+        if state_type == :exclusive
+          if !lexer.state_error.key?(state_name)
+            lexer.logger.warn("No error rule is defined " \
+                              "for exclusive state '#{state_name}'")
+          end
+          if !lexer.state_ignore.key?(state_name)
+            lexer.logger.warn("No ignore rule is defined " \
+                              "for exclusive state '#{state_name}'")
+          end
+        end
+      end
+    end
+    # Validate rules
+    #
+    # @api private
+    def validate_rules(lexer)
+      if lexer.state_re.empty?
+        complain("No rules of the form rule(name, pattern) are defined")
+      end
+      lexer.state_info.each do |state_name, state_type|
+        if !lexer.state_re.key?(state_name.to_sym)
+          complain("No rules defined for state '#{state_name}'")
+        end
+      end
+    end
+    # Raise a failure if validation of a lexer fails
+    #
+    # @raise [Lex::Linter::Failure]
+    #
+    # @api private
+    def complain(*args)
+      raise Failure, *args
+    end
+  end # Linter
+end # Lex

data/lib/lex/logger.rb ADDED

@@ -0,0 +1,21 @@
+# coding: utf-8
+module Lex
+  class Logger
+    def initialize(logger = nil)
+      @logger = ::Logger.new(STDERR)
+    end
+    def info(message)
+      @logger.info(message)
+    end
+    def error(message)
+      @logger.error(message)
+    end
+    def warn(message)
+      @logger.warn(message)
+    end
+  end # Logger
+end # Lex

data/lib/lex/source_line.rb ADDED

@@ -0,0 +1,13 @@
+# coding: utf-8
+module Lex
+  # Lexer tokens' source line
+  class SourceLine
+    attr_accessor :line, :column
+    def initialize(line = 1, column = 1)
+      @line   = line
+      @column = column
+    end
+  end # SourceLine
+end # Lex

data/lib/lex/state.rb ADDED

@@ -0,0 +1,37 @@
+# coding: utf-8
+module Lex
+  class State
+    include Enumerable
+    attr_reader :name, :lexemes
+    def initialize(name, lexemes = [])
+      @name = name
+      @lexemes = lexemes
+    end
+    def each(&block)
+      @lexemes.each(&block)
+    end
+    def <<(lexeme)
+      @lexemes << lexeme
+    end
+    def update(values)
+      values.each do |lexeme|
+        lexemes << lexeme unless lexemes.include?(lexeme)
+      end
+    end
+    def ==(other)
+      @name == other.name &&
+      @lexemes == other.lexemes
+    end
+    def clone
+      self.class.new(@name, @lexemes.map(&:clone))
+    end
+  end # State
+end # Lex

data/lib/lex/token.rb ADDED

@@ -0,0 +1,47 @@
+# coding: utf-8
+require 'forwardable'
+module Lex
+  # Used to represent the tokens produced
+  class Token
+    extend Forwardable
+    attr_accessor :name, :value
+    attr_reader :action
+    def_delegators :@source_line, :line, :column
+    def initialize(name, value, &action)
+      @name        = name
+      @value       = value
+      @action      = action
+      @source_line = SourceLine.new
+    end
+    def update_line(line, column)
+      @source_line.line = line
+      @source_line.column = column
+    end
+    # Return this token as array of values
+    #
+    # @return [Symbol, String, Integer, Integer]
+    #
+    # @api public
+    def to_ary
+      [name, value, line, column]
+    end
+    # Return a string representation
+    #
+    # @return String
+    #
+    # @api public
+    def to_s
+      "Lex::Token(#{to_ary.join(',')})"
+    end
+    alias_method :inspect, :to_s
+  end # Token
+end # Lex

data/lib/lex/version.rb ADDED

@@ -0,0 +1,5 @@
+# coding: utf-8
+module Lex
+  VERSION = "0.1.0"
+end # Lex

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,50 @@
+# coding: utf-8
+if RUBY_VERSION > '1.9' and (ENV['COVERAGE'] || ENV['TRAVIS'])
+  require 'simplecov'
+  require 'coveralls'
+  SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter[
+    SimpleCov::Formatter::HTMLFormatter,
+    Coveralls::SimpleCov::Formatter
+  ]
+  SimpleCov.start do
+    command_name 'spec'
+    add_filter 'spec'
+  end
+end
+require 'lex'
+RSpec.configure do |config|
+  config.expect_with :rspec do |expectations|
+    expectations.include_chain_clauses_in_custom_matcher_descriptions = true
+  end
+  config.mock_with :rspec do |mocks|
+    mocks.verify_partial_doubles = true
+  end
+  # Limits the available syntax to the non-monkey patched syntax that is recommended.
+  config.disable_monkey_patching!
+  # This setting enables warnings. It's recommended, but in some cases may
+  # be too noisy due to issues in dependencies.
+  config.warnings = true
+  if config.files_to_run.one?
+    config.default_formatter = 'doc'
+  end
+  config.profile_examples = 2
+  config.order = :random
+  Kernel.srand config.seed
+end
+def unindent(string)
+  prefix = string.scan(/^[ \t]+(?=\S)/).min
+  string.gsub(/^#{prefix}/, '').chomp
+end

data/spec/unit/error_spec.rb ADDED

@@ -0,0 +1,42 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, '#error' do
+  it "registers error handler" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(:IDENTIFIER)
+      rule(:IDENTIFIER, /a|b/)
+      error do |lexer, token|
+        token
+      end
+      ignore " \t"
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("a(b)a").map(&:to_ary)).to eq([
+      [:IDENTIFIER, 'a', 1, 1],
+      [:error, '(', 1, 2],
+      [:IDENTIFIER, 'b', 1, 3],
+      [:error, ')', 1, 4],
+      [:IDENTIFIER, 'a', 1, 5]
+    ])
+  end
+  it "raises error without error handler" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(:IDENTIFIER)
+      rule(:IDENTIFIER, /a|b/)
+      ignore " \t"
+    end)
+    my_lexer = MyLexer.new
+    expect {
+      my_lexer.lex("a(b)a").to_a
+    }.to raise_error(Lex::LexerError, /Illegal character `\(`/)
+  end
+end

data/spec/unit/keyword_spec.rb ADDED

@@ -0,0 +1,34 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, 'keywords' do
+  it "allows to easily create keyword tokens" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      def self.keywords
+        {
+          if: :IF,
+          then: :THEN,
+          else: :ELSE,
+          while: :WHILE
+        }
+      end
+      tokens(:IDENTIFIER, *keywords.values)
+      rule(:IDENTIFIER, /\w[\w\d]*/) do |lexer, token|
+        token.name = lexer.class.keywords.fetch(token.value.to_sym, :IDENTIFIER)
+        token
+      end
+      ignore(' ')
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("if then else").map(&:to_ary)).to eq([
+      [:IF, 'if', 1, 1],
+      [:THEN, 'then', 1, 4],
+      [:ELSE, 'else', 1, 9]
+    ])
+  end
+end

data/spec/unit/lex_spec.rb ADDED

@@ -0,0 +1,60 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, 'lex' do
+  it "tokenizes simple input" do
+    code = unindent(<<-EOS)
+      x = 5 + 44 * (s - t)
+    EOS
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(
+        :NUMBER,
+        :PLUS,
+        :MINUS,
+        :TIMES,
+        :DIVIDE,
+        :LPAREN,
+        :RPAREN,
+        :EQUALS,
+        :IDENTIFIER
+      )
+      rule(:PLUS,   /\+/)
+      rule(:MINUS,  /\-/)
+      rule(:TIMES,  /\*/)
+      rule(:DIVIDE, /\//)
+      rule(:LPAREN, /\(/)
+      rule(:RPAREN, /\)/)
+      rule(:EQUALS, /=/)
+      rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
+      rule(:NUMBER, /[0-9]+/) do |lexer, token|
+        token.value = token.value.to_i
+        token
+      end
+      rule(:newline, /\n+/) do |lexer, token|
+        lexer.advance_line(token.value.length)
+      end
+      ignore " \t"
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex(code).map(&:to_ary)).to eq([
+      [:IDENTIFIER, 'x', 1, 1],
+      [:EQUALS, '=', 1, 3],
+      [:NUMBER, 5, 1, 5],
+      [:PLUS, '+', 1, 7],
+      [:NUMBER, 44, 1, 9],
+      [:TIMES, '*', 1, 12],
+      [:LPAREN, '(', 1, 14],
+      [:IDENTIFIER, 's', 1, 15],
+      [:MINUS, '-', 1, 17],
+      [:IDENTIFIER, 't', 1, 19],
+      [:RPAREN, ')', 1, 20]
+    ])
+  end
+end