RubyGems - rly - Versions diffs - 0.1.0 → 0.2.0 - Mend

rly 0.1.0 → 0.2.0

Files changed (24) hide show

data/README.md +2 -2
data/assets/ply_dump.erb +15 -0
data/lib/rly.rb +2 -0
data/lib/rly/lex.rb +54 -25
data/lib/rly/lex_token.rb +8 -0
data/lib/rly/parse/grammar.rb +211 -0
data/lib/rly/parse/lr_item.rb +32 -0
data/lib/rly/parse/lr_table.rb +529 -0
data/lib/rly/parse/ply_dump.rb +52 -0
data/lib/rly/parse/production.rb +38 -0
data/lib/rly/parse/rule_parser.rb +68 -0
data/lib/rly/parse/yacc_production.rb +11 -0
data/lib/rly/parse/yacc_symbol.rb +6 -0
data/lib/rly/version.rb +2 -1
data/lib/rly/yacc.rb +355 -0
data/spec/lex/{lexer_spec.rb → lex_spec.rb} +45 -24
data/spec/parse/calc_spec.rb +95 -0
data/spec/parse/grammar_spec.rb +239 -0
data/spec/parse/lr_table_spec.rb +212 -0
data/spec/parse/production_spec.rb +18 -0
data/spec/parse/rule_parser_spec.rb +20 -0
data/spec/parse/yacc_spec.rb +57 -0
data/spec/spec_helper.rb +5 -0
metadata +26 -4

data/spec/lex/{lexer_spec.rb → lex_spec.rb} RENAMED

@@ -1,19 +1,19 @@
 require "rly"
 describe Rly::Lex do
-  context "Simple Lexer" do
+  context "Basic lexer" do
     testLexer = Class.new(Rly::Lex) do
       token :FIRST, /[a-z]+/
       token :SECOND, /[A-Z]+/
     end
-    it "should have a list of defined tokens" do
+    it "has a list of defined tokens" do
       testLexer.tokens.map { |t, r, b| t }.should == [:FIRST, :SECOND]
     end
-    it "should output tokens one by one" do
+    it "outputs tokens one by one" do
       test = 'qweASDzxc'
-      l = testLexer.new(test).to_enum
+      l = testLexer.new(test)
       tok = l.next
       tok.type.should == :FIRST
@@ -27,40 +27,48 @@ describe Rly::Lex do
       tok.type.should == :FIRST
       tok.value.should == 'zxc'
-      expect { l.next } .to raise_error(StopIteration)
+      l.next.should be_nil
+    end
+    it "provides tokens in terminals list" do
+      testLexer.terminals.should == [:FIRST, :SECOND]
     end
   end
-  context "Literals Lexer" do
+  context "Lexer with literals defined" do
     testLexer = Class.new(Rly::Lex) do
       literals "+-*/"
     end
-    it "should output literal tokens" do
+    it "outputs literal tokens" do
       test = '++--'
-      l = testLexer.new(test).to_enum
+      l = testLexer.new(test)
       l.next.value.should == '+'
       l.next.value.should == '+'
       l.next.value.should == '-'
       l.next.value.should == '-'
     end
+    it "provides literals in terminals list" do
+      testLexer.terminals.should == ['+', '-', '*', '/']
+    end
   end
-  context "Ignores Lexer" do
+  context "Lexer with ignores defined" do
     testLexer = Class.new(Rly::Lex) do
       ignore " \t"
     end
-    it "should honour ignores list" do
+    it "honours ignores list" do
       test = "     \t\t  \t    \t"
-      l = testLexer.new(test).to_enum
+      l = testLexer.new(test)
-      expect { l.next } .to raise_error(StopIteration)
+      l.next.should be_nil
     end
   end
-  context "Block-based Token Lexer" do
+  context "Lexer with token that has a block given" do
     testLexer = Class.new(Rly::Lex) do
       token :TEST, /\d+/ do |t|
         t.value = t.value.to_i
@@ -70,44 +78,46 @@ describe Rly::Lex do
     it "calls a block to further process a token" do
       test = "42"
-      l = testLexer.new(test).to_enum
+      l = testLexer.new(test)
-      l.next.value == 42
+      l.next.value.should == 42
     end
   end
-  context "Non-outputtable tokens Lexer" do
+  context "Lexer with unnamed token and block given" do
     testLexer = Class.new(Rly::Lex) do
       token /\n+/ do |t| t.lexer.lineno = t.value.count("\n"); t end
     end
-    it "process but don't output tokens without a name" do
+    it "processes but don't output tokens without a name" do
       test = "\n\n\n"
       l = testLexer.new(test)
-      expect { l.to_enum.next } .to raise_error(StopIteration)
+      l.next.should be_nil
       l.lineno.should == 3
     end
   end
-  context "Error handling" do
+  context "Lexer with no error handler" do
     it "raises an error, if there are no suitable tokens" do
       testLexer = Class.new(Rly::Lex) do
         token :NUM, /\d+/
       end
       l = testLexer.new("test")
-      expect { l.to_enum.next } .to raise_error(Rly::LexError)
+      expect { l.next } .to raise_error(Rly::LexError)
     end
     it "raises an error, if there is no possible tokens defined" do
       testLexer = Class.new(Rly::Lex) do ; end
       l = testLexer.new("test")
-      expect { l.to_enum.next } .to raise_error(Rly::LexError)
+      expect { l.next } .to raise_error(Rly::LexError)
     end
+  end
+  context "Lexer with error handler" do
     it "calls an error function if it is available, which returns a fixed token" do
       testLexer = Class.new(Rly::Lex) do
         token :NUM, /\d+/
@@ -119,11 +129,11 @@ describe Rly::Lex do
       end
       l = testLexer.new("test")
-      tok = l.to_enum.next
+      tok = l.next
       tok.value.should == "BAD t"
       tok.type.should == :error
-      tok = l.to_enum.next
+      tok = l.next
       tok.value.should == "BAD e"
       tok.type.should == :error
     end
@@ -138,7 +148,18 @@ describe Rly::Lex do
       end
       l = testLexer.new("test1")
-      l.to_enum.next.value.should == '1'
+      l.next.value.should == '1'
     end
   end
+  it "doesn't try to skip chars over" do
+    testLexer = Class.new(Rly::Lex) do
+        token :NUM, /\d+/
+        literals ","
+      end
+      l = testLexer.new(",10")
+      l.next.type.should == ','
+      l.next.type.should == :NUM
+  end
 end

data/spec/parse/calc_spec.rb ADDED

@@ -0,0 +1,95 @@
+require "rly"
+module CalcSpecExample
+  class CalcLex < Rly::Lex
+    literals '=+-*/()'
+    ignore " \t"
+    token :NAME, /[a-zA-Z_][a-zA-Z0-9_]*/
+    token :NUMBER, /\d+/ do |t|
+      t.value = t.value.to_i
+      t
+    end
+    token(/\n+/) { |t| t.lexer.lineno += t.value.count("\n") }
+    on_error do |t|
+      puts "Illegal character #{t.value}"
+      t.lexer.pos += 1
+    end
+  end
+  class CalcParse < Rly::Yacc
+    def names
+      @names ||= {}
+    end
+    precedence :left,  '+', '-'
+    precedence :left,  '*', '/'
+    precedence :right, :UMINUS
+    rule 'statement : NAME "=" expression' do |st, n, _, e|
+      self.names[n.value] = e.value
+    end
+    rule 'statement : expression' do |st, e|
+      st.value = e.value
+    end
+    rule 'expression : expression "+" expression
+                     | expression "-" expression
+                     | expression "*" expression
+                     | expression "/" expression' do |ex, e1, op, e2|
+      ex.value = e1.value.send(op.value, e2.value)
+    end
+    # rule 'expression : "-" expression %prec UMINUS' do |ex, _, e|
+    #   ex.value = - e.value
+    # end
+    rule 'expression : "(" expression ")"' do |ex, _, e, _|
+      ex.value = e.value
+    end
+    rule 'expression : NUMBER' do |ex, n|
+      ex.value = n.value
+    end
+    rule 'expression : NAME' do |ex, n|
+      nval = self.names[n.value]
+      unless nval
+        puts "Undefined name '#{n.value}'"
+        nval = 0
+      end
+      ex.value = nval
+    end
+    # rule_error do |p|
+    #   if p
+    #     puts "Syntax error at '#{p.value}'"
+    #   else
+    #     puts "Syntax error at EOF"
+    #   end
+    # end
+  end
+end
+describe 'Calculator' do
+  before :each do
+    @calc = CalcSpecExample::CalcParse.new(CalcSpecExample::CalcLex.new)
+  end
+  it "calculates simple expressions" do
+    @calc.parse('2 + 2').should == 4
+  end
+  it "calculates complex expressions" do
+    @calc.parse('(3-1)*6/(3+1)').should == 3
+  end
+  it "keeps state between parses" do
+    @calc.parse('magic = 42')
+    @calc.parse('2 * magic').should == 84
+  end
+end

data/spec/parse/grammar_spec.rb ADDED

@@ -0,0 +1,239 @@
+require "rly"
+require "rly/parse/grammar"
+require "rly/parse/ply_dump"
+describe Rly::Grammar do
+  it "requires a list of terminals to be initialized" do
+    g = Rly::Grammar.new([:NUMBER])
+    g.terminals[:NUMBER].should_not be_nil
+  end
+  it "rejects terminals named in lowercase" do
+    expect { Rly::Grammar.new([:test]) } .to raise_error(ArgumentError)
+  end
+  it "has a default terminal -- error" do
+    g = Rly::Grammar.new([])
+    g.terminals[:error].should_not be_nil
+  end
+  context "Precedence specs" do
+    it "allows to set precedence" do
+      g = Rly::Grammar.new([])
+      g.set_precedence('+', :left, 1)
+    end
+    it "does not allow to set precedence after any productions have been added" do
+      g = Rly::Grammar.new([])
+      g.add_production(:expression, [:expression, '+', :expression])
+      expect { g.set_precedence('+', :left, 1) } .to raise_error(RuntimeError)
+    end
+    it "does not allow setting precedence several times for same terminal" do
+      g = Rly::Grammar.new([])
+      g.set_precedence('+', :left, 1)
+      expect { g.set_precedence('+', :left, 1) } .to raise_error(ArgumentError)
+    end
+    it "allows setting only :left, :right or :noassoc precedence associations" do
+      g = Rly::Grammar.new([])
+      expect { g.set_precedence('+', :bad, 1) } .to raise_error(ArgumentError)
+    end
+  end
+  context "Production specs" do
+    it "returns a Production object when adding production" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      p.should be_a(Rly::Production)
+    end
+    it "rejects productions not named in lowercase" do
+      g = Rly::Grammar.new([])
+      expect { g.add_production(:BAD, []) } .to raise_error(ArgumentError)
+    end
+    it "rejects production named :error" do
+      g = Rly::Grammar.new([])
+      expect { g.add_production(:error, []) } .to raise_error(ArgumentError)
+    end
+    it "registers one-char terminals" do
+      g = Rly::Grammar.new([])
+      g.add_production(:expression, [:expression, '+', :expression])
+      g.terminals['+'].should_not be_nil
+    end
+    it "raises ArgumentError if one-char terminal is not actually an one char" do
+      g = Rly::Grammar.new([])
+      expect { g.add_production(:expression, [:expression, 'lulz', :expression]) } .to raise_error(ArgumentError)
+    end
+    it "calculates production precedence based on rightmost terminal" do
+      g = Rly::Grammar.new([])
+      g.set_precedence('+', :left, 1)
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      p.precedence.should == [:left, 1]
+    end
+    it "defaults precedence to [:right, 0]" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      p.precedence.should == [:right, 0]
+    end
+    it "adds production to the list of productions" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      g.productions.count.should == 2
+      g.productions.last == p
+    end
+    it "adds production to the list of productions referenced by names" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      g.prodnames.count.should == 1
+      g.prodnames[:expression].should == [p]
+    end
+    it "adds production to the list of non-terminals" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      g.nonterminals[:expression].should_not be_nil
+    end
+    it "adds production number to referenced terminals" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      g.terminals['+'].should == [p.index]
+    end
+    it "adds production number to referenced non-terminals" do
+      g = Rly::Grammar.new([])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      g.nonterminals[:expression].should == [p.index, p.index]
+    end
+    it "does not allow duplicate rules" do
+      g = Rly::Grammar.new([])
+      g.add_production(:expression, [:expression, '+', :expression])
+      expect { g.add_production(:expression, [:expression, '+', :expression]) } .to raise_error(ArgumentError)
+    end
+  end
+  context "Start symbol specs" do
+    before :each do
+      @g = Rly::Grammar.new([])
+      p = @g.add_production(:expression, [:expression, '+', :expression])
+      @g.set_start()
+    end
+    it "sets start symbol if it is specified explicitly" do
+      @g.start.should == :expression
+    end
+    it "sets start symbol based on first production if it is not specified explicitly" do
+      @g.start.should == :expression
+    end
+    it "accepts only existing non-terminal as a start" do
+      g = Rly::Grammar.new([:NUMBER])
+      p = g.add_production(:expression, [:expression, '+', :expression])
+      expect { g.set_start(:NUMBER) } .to raise_error(ArgumentError)
+      expect { g.set_start(:new_sym) } .to raise_error(ArgumentError)
+    end
+    it "sets zero rule to :S' -> :start" do
+      prod_0 = @g.productions[0]
+      prod_0.index.should == 0
+      prod_0.name.should == :"S'"
+      prod_0.prod.should == [:expression]
+    end
+    it "adds 0 to start rule nonterminals" do
+      @g.nonterminals[:expression][-1].should == 0
+    end
+  end
+  context "LR table generation specs" do
+    before :each do
+      @g = Rly::Grammar.new([:NUMBER])
+      @g.set_precedence('+', :left, 1)
+      @g.set_precedence('-', :left, 1)
+      @g.add_production(:statement, [:expression])
+      @g.add_production(:expression, [:expression, '+', :expression])
+      @g.add_production(:expression, [:expression, '-', :expression])
+      @g.add_production(:expression, [:NUMBER])
+      @g.set_start
+      @g.build_lritems
+    end
+    it "builds LR items for grammar" do
+      @g.productions.length.should == 5
+      items = [2, 2, 4, 4, 2]
+      @g.productions.each_with_index do |p, i|
+        p.lr_items.count.should == items[i]
+      end
+    end
+    it "sets LR items to correct default values" do
+      i = @g.productions[0].lr_items[0]
+      i.lr_after.should == [@g.productions[1]]
+      i.prod.should == [:'.', :statement]
+      i = @g.productions[0].lr_items[1]
+      i.lr_after.should == []
+      i.prod.should == [:statement, :'.']
+      i = @g.productions[2].lr_items[0]
+      i.lr_after.should == @g.productions[2..4]
+      i.prod.should == [:'.', :expression, '+', :expression]
+    end
+    it "builds correct FIRST table" do
+      first = @g.compute_first
+      first.should == {
+        :'$end' => [:'$end'],
+        '+' => ['+'],
+        '-' => ['-'],
+        :NUMBER => [:NUMBER],
+        :error => [:error],
+        :expression => [:NUMBER],
+        :statement => [:NUMBER]
+      }
+    end
+    it "builds correct FOLLOW table" do
+      @g.compute_first
+      follow = @g.compute_follow
+      follow.should == { :expression => [:'$end', '+', '-'], :statement => [:'$end'] }
+    end
+  end
+  it "should generate parser.out same as Ply does" do
+    pending "thx to python dicts we have a different order of states. ideas?"
+    g = Rly::Grammar.new([:NUMBER])
+    g.set_precedence('+', :left, 1)
+    g.set_precedence('-', :left, 1)
+    g.add_production(:statement, [:expression])
+    g.add_production(:expression, [:expression, '+', :expression])
+    g.add_production(:expression, [:expression, '-', :expression])
+    g.add_production(:expression, [:NUMBER])
+    g.set_start
+    d = Rly::PlyDump.new(g)
+    orig = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out')
+    dst = File.join(File.dirname(__FILE__), '..', 'fixtures', 'minicalc_ply_parser.out.new')
+    open(dst, 'w') { |f| f.write(d.to_s) }
+    d.to_s.should == open(orig).read
+  end
+end