RubyGems - lex - Versions diffs - 0.1.0 - Mend

lex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

checksums.yaml +7 -0
data/.gitignore +14 -0
data/.rspec +3 -0
data/.ruby-version +1 -0
data/.travis.yml +22 -0
data/Gemfile +19 -0
data/LICENSE.txt +22 -0
data/README.md +423 -0
data/Rakefile +8 -0
data/lex.gemspec +22 -0
data/lib/lex.rb +22 -0
data/lib/lex/lexeme.rb +27 -0
data/lib/lex/lexer.rb +210 -0
data/lib/lex/lexer/dsl.rb +49 -0
data/lib/lex/lexer/rule_dsl.rb +165 -0
data/lib/lex/lexers.rb +11 -0
data/lib/lex/lexers/html.rb +8 -0
data/lib/lex/linter.rb +114 -0
data/lib/lex/logger.rb +21 -0
data/lib/lex/source_line.rb +13 -0
data/lib/lex/state.rb +37 -0
data/lib/lex/token.rb +47 -0
data/lib/lex/version.rb +5 -0
data/spec/spec_helper.rb +50 -0
data/spec/unit/error_spec.rb +42 -0
data/spec/unit/keyword_spec.rb +34 -0
data/spec/unit/lex_spec.rb +60 -0
data/spec/unit/position_spec.rb +94 -0
data/spec/unit/rule_spec.rb +63 -0
data/spec/unit/state/clone_spec.rb +15 -0
data/spec/unit/states_spec.rb +194 -0
data/spec/unit/tokens_spec.rb +32 -0
data/tasks/console.rake +10 -0
data/tasks/coverage.rake +11 -0
data/tasks/spec.rake +29 -0
metadata +104 -0

data/spec/unit/position_spec.rb ADDED

@@ -0,0 +1,94 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, 'position' do
+  it "calculates line number and position info from input" do
+    code = unindent(<<-EOS)
+      x = 1
+      y = 2
+      s = x + y
+    EOS
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(
+        :NUMBER,
+        :PLUS,
+        :IDENTIFIER,
+        :EQUALS
+      )
+      rule(:PLUS,   /\+/)
+      rule(:EQUALS, /=/)
+      rule(:IDENTIFIER, /\A[_\$a-zA-Z][_\$0-9a-zA-Z]*/)
+      rule(:NUMBER, /[0-9]+/) do |lexer, token|
+        token.value = token.value.to_i
+        token
+      end
+      ignore " \t"
+      rule(:newline, /\n+/) do |lexer, token|
+        lexer.advance_line(token.value.length)
+      end
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex(code).map(&:to_ary)).to eq([
+      [:IDENTIFIER, 'x', 1, 1],
+      [:EQUALS, '=', 1, 3],
+      [:NUMBER, 1, 1, 5],
+      [:IDENTIFIER, 'y', 2, 1],
+      [:EQUALS, '=', 2, 3],
+      [:NUMBER, 2, 2, 5],
+      [:IDENTIFIER, 's', 3, 1],
+      [:EQUALS, '=', 3, 3],
+      [:IDENTIFIER, 'x', 3, 5],
+      [:PLUS, '+', 3, 7],
+      [:IDENTIFIER, 'y', 3, 9]
+    ])
+  end
+  it "correctly tracks multiline content" do
+    code = unindent(<<-EOS)
+      This is
+         <b>webpage!</b>
+    EOS
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(
+        :WORD,
+        :LANGLE,
+        :RANGLE,
+        :LANGLESLASH
+      )
+      rule(:WORD, /[^ <>\n]+/)
+      rule(:LANGLE, /</)
+      rule(:RANGLE, />/)
+      rule(:LANGLESLASH, /<\//)
+      rule(:newline, /\n/) do |lexer, token|
+        lexer.advance_line(token.value.size)
+      end
+      ignore " "
+      error do |lexer, token|
+      end
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex(code).map(&:to_ary)).to eq([
+      [:WORD, 'This', 1, 1],
+      [:WORD, 'is', 1, 6],
+      [:LANGLE, '<', 2, 4],
+      [:WORD, 'b', 2, 5],
+      [:RANGLE, '>', 2, 6],
+      [:WORD, 'webpage!', 2, 7],
+      [:LANGLESLASH, '</', 2, 15],
+      [:WORD, 'b', 2, 17],
+      [:RANGLE, '>', 2, 18]
+    ])
+  end
+end

data/spec/unit/rule_spec.rb ADDED

@@ -0,0 +1,63 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, '#rule' do
+  it "raises error with no rules" do
+    expect {
+      stub_const('MyLexer', Class.new(Lex::Lexer) do
+        tokens(:ID)
+      end)
+      MyLexer.new
+    }.to raise_error(Lex::Linter::Failure, /No rules of the form/)
+  end
+  it "skips rule that has action but doesn't return token" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(
+        :IDENTIFIER,
+        :LBRACE,
+        :RBRACE
+      )
+      rule(:IDENTIFIER, /a|b/)
+      rule(:LBRACE, /{/) do |lexer, token|
+      end
+      rule(:RBRACE, /}/) do |lexer, token|
+        token
+      end
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("a{b}a").map(&:to_ary)).to eq([
+      [:IDENTIFIER, 'a', 1, 1],
+      [:IDENTIFIER, 'b', 1, 3],
+      [:RBRACE, '}', 1, 4],
+      [:IDENTIFIER, 'a', 1, 5]
+    ])
+  end
+  it "validates uniquness" do
+    expect {
+      Class.new(Lex::Lexer) do
+        tokens( :WORD )
+        rule(:WORD, /\w+/)
+        rule(:WORD, /\w+/)
+      end
+    }.to raise_error(Lex::LexerError, /Rule 'WORD' redefined./)
+  end
+  it "throws error if using token in rule without prior specifying" do
+    expect {
+      Class.new(Lex::Lexer) do
+        tokens(:ID)
+        rule(:UNKNOWN, /a/)
+      end
+    }.to raise_error(Lex::LexerError, /Rule 'UNKNOWN' defined for an unspecified token UNKNOWN/)
+  end
+end

data/spec/unit/state/clone_spec.rb ADDED

@@ -0,0 +1,15 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::State, '.clone' do
+  it "clones state instance" do
+    lexeme = double(:lexeme)
+    lexemes = [lexeme, lexeme]
+    state = Lex::State.new(:initial, lexemes)
+    new_state = state.clone
+    expect(new_state).to_not eql(state)
+    expect(new_state.lexemes).to_not eql(state.lexemes)
+  end
+end

data/spec/unit/states_spec.rb ADDED

@@ -0,0 +1,194 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, '#states' do
+  it "checks states" do
+    expect {
+      stub_const('MyLexer', Class.new(Lex::Lexer) do
+        tokens(:IDENTIFIER)
+        states(foo: :unknown)
+      end)
+      MyLexer.new
+    }.to raise_error(Lex::Linter::Failure, /State type for state foo must be/)
+  end
+  it "lexes ignoring :exclusive state tokens" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens(
+        :IDENTIFIER,
+        :LBRACE,
+        :RBRACE
+      )
+      states( brace: :exclusive )
+      rule(:IDENTIFIER, /a|b/)
+      rule(:LBRACE, /{/) do |lexer, token|
+        lexer.push_state(:brace)
+        token
+      end
+      rule(:brace_RBRACE, /}/) do |lexer, token|
+        lexer.pop_state
+        token
+      end
+      error(:brace) do |lexer, token|
+      end
+      ignore(:brace, " \t")
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("a{bb}a").map(&:to_ary)).to eq([
+      [:IDENTIFIER, 'a', 1, 1],
+      [:LBRACE, '{', 1, 2],
+      [:RBRACE, '}', 1, 5],
+      [:IDENTIFIER, 'a', 1, 6]
+    ])
+  end
+  it "lexes in :exclusive state" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens( :WORD )
+      states( htmlcomment: :exclusive )
+      rule(:WORD, /\w+/)
+      rule(:htmlcomment, /<!--/) do |lexer, token|
+        lexer.push_state(:htmlcomment)
+      end
+      rule(:htmlcomment_end, /-->/) do |lexer, token|
+        lexer.pop_state
+      end
+      error(:htmlcomment) do |lexer, token|
+      end
+      ignore(:htmlcomment, " \t")
+      ignore " \t"
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
+      [:WORD, 'hello', 1, 1],
+      [:WORD, 'world', 1, 24]
+    ])
+  end
+  it "warns about lack of error condition in :exclusive state" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens( :WORD )
+      states( htmlcomment: :exclusive )
+      rule(:WORD, /\w+/)
+      rule(:htmlcomment_WORD, /\w+/)
+      ignore " "
+    end)
+    expect {
+      MyLexer.new
+    }.to output(/No error rule is defined for exclusive state 'htmlcomment'/).
+      to_stderr_from_any_process
+  end
+  it "warns about lack of ignore condition in :inclusive state" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens( :WORD )
+      states( htmlcomment: :exclusive )
+      rule(:WORD, /\w+/)
+      rule(:htmlcomment_WORD, /\w+/)
+      error(:htmlcomment)
+    end)
+    expect {
+      MyLexer.new
+    }.to output(/No ignore rule is defined for exclusive state 'htmlcomment'/).
+      to_stderr_from_any_process
+  end
+  it "lexes in :inclusive state" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens( :WORD )
+      states( htmlcomment: :inclusive )
+      rule(:WORD, /\w+/)
+      rule(:htmlcomment, /<!--/) do |lexer, token|
+        lexer.push_state(:htmlcomment)
+      end
+      rule(:htmlcomment_end, /-->/) do |lexer, token|
+        lexer.pop_state
+      end
+      error(:htmlcomment) do |lexer, token|
+      end
+      ignore(:htmlcomment, " \t")
+      ignore " \t"
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
+      [:WORD, 'hello', 1, 1],
+      [:WORD, 'comment', 1, 12],
+      [:WORD, 'world', 1, 24]
+    ])
+  end
+  it "includes error condition in :inclusive state" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens( :WORD )
+      states( htmlcomment: :inclusive )
+      rule(:WORD, /\w+/)
+      rule(:htmlcomment, /<!--/) do |lexer, token|
+        lexer.push_state(:htmlcomment)
+      end
+      rule(:htmlcomment_end, /-->/) do |lexer, token|
+        lexer.pop_state
+      end
+      error do |lexer, token| end
+      ignore " \t"
+    end)
+    my_lexer = MyLexer.new
+    expect(my_lexer.lex("hello <!-- comment --> world").map(&:to_ary)).to eq([
+      [:WORD, 'hello', 1, 1],
+      [:WORD, 'comment', 1, 12],
+      [:WORD, 'world', 1, 24]
+    ])
+  end
+  it "complains if there are no rules for state" do
+    stub_const('MyLexer', Class.new(Lex::Lexer) do
+      tokens( :WORD )
+      states( htmlcomment: :inclusive )
+      rule(:WORD, /\w+/)
+      error do |lexer, token| end
+      ignore " \t"
+    end)
+    expect {
+      MyLexer.new
+    }.to raise_error(Lex::Linter::Failure, /No rules defined for state 'htmlcomment'/)
+  end
+end

data/spec/unit/tokens_spec.rb ADDED

@@ -0,0 +1,32 @@
+# coding: utf-8
+require 'spec_helper'
+RSpec.describe Lex::Lexer, '#tokens' do
+  it "requires a non-empty list tokens" do
+    expect {
+      stub_const('MyLexer', Class.new(Lex::Lexer) do
+        tokens()
+      end)
+      MyLexer.new
+    }.to raise_error(Lex::Linter::Failure, /No token list defined/)
+  end
+  it "requires a list of valid tokens" do
+    expect {
+      stub_const('MyLexer', Class.new(Lex::Lexer) do
+        tokens(:"#token")
+      end)
+      MyLexer.new
+    }.to raise_error(Lex::Linter::Failure, /Bad token name `#token`/)
+  end
+  it "doesn't allow for multiple same tokens" do
+    expect {
+      stub_const('MyLexer', Class.new(Lex::Lexer) do
+        tokens(:token, :token)
+      end)
+      MyLexer.new
+    }.to raise_error(Lex::Linter::Failure, /Token `token` already defined/)
+  end
+end

data/tasks/console.rake ADDED

@@ -0,0 +1,10 @@
+# encoding: utf-8
+desc 'Load gem inside irb console'
+task :console do
+  require 'irb'
+  require 'irb/completion'
+  require File.join(__FILE__, '../../lib/lex')
+  ARGV.clear
+  IRB.start
+end

data/tasks/coverage.rake ADDED

@@ -0,0 +1,11 @@
+# encoding: utf-8
+desc 'Measure code coverage'
+task :coverage do
+  begin
+    original, ENV['COVERAGE'] = ENV['COVERAGE'], 'true'
+    Rake::Task['spec'].invoke
+  ensure
+    ENV['COVERAGE'] = original
+  end
+end

data/tasks/spec.rake ADDED

@@ -0,0 +1,29 @@
+# encoding: utf-8
+begin
+  require 'rspec/core/rake_task'
+  desc 'Run all specs'
+  RSpec::Core::RakeTask.new(:spec) do |task|
+    task.pattern = 'spec/{unit,integration}{,/*/**}/*_spec.rb'
+  end
+  namespace :spec do
+    desc 'Run unit specs'
+    RSpec::Core::RakeTask.new(:unit) do |task|
+      task.pattern = 'spec/unit{,/*/**}/*_spec.rb'
+    end
+    desc 'Run integration specs'
+    RSpec::Core::RakeTask.new(:integration) do |task|
+      task.pattern = 'spec/integration{,/*/**}/*_spec.rb'
+    end
+  end
+rescue LoadError
+  %w[spec spec:unit spec:integration].each do |name|
+    task name do
+      $stderr.puts "In order to run #{name}, do `gem install rspec`"
+    end
+  end
+end