RubyGems - rley - Versions diffs - 0.5.04 → 0.5.05 - Mend

rley 0.5.04 → 0.5.05

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/examples/general/calc_iter1/calc_demo.rb +1 -1
data/examples/general/calc_iter2/calc_ast_builder.rb +200 -0
data/examples/general/calc_iter2/calc_ast_nodes.rb +156 -0
data/examples/general/calc_iter2/calc_demo.rb +66 -0
data/examples/general/calc_iter2/calc_grammar.rb +31 -0
data/examples/general/calc_iter2/calc_lexer.rb +78 -0
data/examples/general/calc_iter2/calc_parser.rb +24 -0
data/examples/general/calc_iter2/spec/calculator_spec.rb +113 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/gfg/grm_flow_graph.rb +41 -12
data/lib/rley/gfg/vertex.rb +11 -4
data/lib/rley/parser/gfg_parsing.rb +29 -1
data/lib/rley/parser/parse_entry_set.rb +2 -1
data/lib/rley/parser/parse_forest_factory.rb +7 -0
data/lib/rley/parser/parse_rep_creator.rb +8 -2
data/lib/rley/parser/parse_tree_builder.rb +5 -3
data/lib/rley/parser/parse_tree_factory.rb +1 -1
data/lib/rley/parser/parse_walker_factory.rb +15 -10
data/spec/rley/parser/ambiguous_parse_spec.rb +1 -1
data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
data/spec/rley/parser/gfg_parsing_spec.rb +1 -1
data/spec/rley/parser/groucho_spec.rb +1 -1
data/spec/rley/parser/parse_forest_builder_spec.rb +1 -1
data/spec/rley/parser/parse_walker_factory_spec.rb +148 -11
metadata +9 -2

data/examples/general/calc_iter2/calc_grammar.rb ADDED

@@ -0,0 +1,31 @@
+# Grammar for simple arithmetical expressions
+require 'rley' # Load the gem
+########################################
+# Define a grammar for basic arithmetical expressions
+builder = Rley::Syntax::GrammarBuilder.new do
+  add_terminals('NUMBER')
+  add_terminals('LPAREN', 'RPAREN') # For '(', ')' delimiters
+  add_terminals('PLUS', 'MINUS') # For '+', '-' operators or sign
+  add_terminals('STAR', 'DIVIDE', 'POWER') # For '*', '/', '**' operators
+  rule 'expression' => %w[simple_expression]
+  rule 'simple_expression' => 'term'
+  rule 'simple_expression' => %w[simple_expression add_operator term]
+  rule 'term' => 'factor'
+  rule 'term' => %w[term mul_operator factor]
+  rule 'factor' => 'simple_factor'
+  rule 'factor' => %w[simple_factor POWER simple_factor]
+  rule 'simple_factor' => %w[sign NUMBER]
+  rule 'simple_factor' => %w[LPAREN expression RPAREN]
+  rule 'simple_factor' => %w[MINUS LPAREN expression RPAREN]
+  rule 'sign' => 'PLUS'
+  rule 'sign' => 'MINUS'
+  rule 'sign' => []
+  rule 'add_operator' => 'PLUS'
+  rule 'add_operator' => 'MINUS'
+  rule 'mul_operator' => 'STAR'
+  rule 'mul_operator' => 'DIVIDE'
+end
+# And now build the grammar...
+CalcGrammar = builder.grammar

data/examples/general/calc_iter2/calc_lexer.rb ADDED

@@ -0,0 +1,78 @@
+# File: calc_lexer.rb
+# Lexer for a basic arithmetical expression parser
+require 'strscan'
+require 'rley' # Load the gem
+class CalcLexer
+  attr_reader(:scanner)
+  attr_reader(:lineno)
+  attr_reader(:line_start)
+  attr_reader(:name2symbol)
+  @@lexeme2name = {
+    '(' => 'LPAREN',
+    ')' => 'RPAREN',
+    '+' => 'PLUS',
+    '-' => 'MINUS',
+    '*' => 'STAR',
+    '/' => 'DIVIDE',
+    '**' => 'POWER'
+  }.freeze
+  class ScanError < StandardError; end
+  def initialize(source, aGrammar)
+    @scanner = StringScanner.new(source)
+    @name2symbol = aGrammar.name2symbol
+    @lineno = 1
+  end
+  def tokens()
+    tok_sequence = []
+    until @scanner.eos?
+      token = _next_token
+      tok_sequence << token unless token.nil?
+    end
+    return tok_sequence
+  end
+  private
+  def _next_token()
+    skip_whitespaces
+    curr_ch = scanner.peek(1)
+    return nil if curr_ch.nil?
+    token = nil
+    if '()+-/'.include? curr_ch
+      # Single character token
+      token = build_token(@@lexeme2name[curr_ch], scanner.getch)
+    elsif (lexeme = scanner.scan(/\*\*/))
+      token = build_token(@@lexeme2name[lexeme], lexeme)
+    elsif (lexeme = scanner.scan(/\*/))
+      token = build_token(@@lexeme2name[lexeme], lexeme)
+    elsif (lexeme = scanner.scan(/[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/))
+      token = build_token('NUMBER', lexeme)
+    else # Unknown token
+      erroneous = curr_ch.nil? ? '' : curr_ch
+      sequel = scanner.scan(/.{1,20}/)
+      erroneous += sequel unless sequel.nil?
+      raise ScanError.new("Unknown token #{erroneous}")
+    end
+    return token
+  end
+  def build_token(aSymbolName, aLexeme)
+    token_type = name2symbol[aSymbolName]
+    return Rley::Tokens::Token.new(aLexeme, token_type)
+  end
+  def skip_whitespaces()
+    scanner.scan(/[ \t\f\n\r]+/)
+  end
+end # class

data/examples/general/calc_iter2/calc_parser.rb ADDED

@@ -0,0 +1,24 @@
+# Purpose: to demonstrate how to build and render a parse tree for JSON
+# language
+require_relative 'calc_lexer'
+require_relative 'calc_grammar'
+# A parser for arithmetic expressions
+class CalcParser < Rley::Parser::GFGEarleyParser
+  attr_reader(:source_file)
+  # Constructor
+  def initialize()
+    # Builder the Earley parser with the calculator grammar
+    super(CalcGrammar)
+  end
+  def parse_expression(aText)
+    lexer = CalcLexer.new(aText, grammar)
+    result = parse(lexer.tokens)
+    return result
+  end
+end # class
+# End of file

data/examples/general/calc_iter2/spec/calculator_spec.rb ADDED

@@ -0,0 +1,113 @@
+require 'rspec' # Use the RSpec framework
+require_relative '../calc_parser' # Load the class under test
+require_relative '../calc_ast_builder'
+RSpec.configure do |config|
+  # Display stack trace in case of failure
+  config.full_backtrace = true
+end
+describe 'Calculator' do
+  def parse_expression(anExpression)
+    # Create a calculator parser object
+    parser = CalcParser.new
+    result = parser.parse_expression(anExpression)
+    unless result.success?
+      # Stop if the parse failed...
+      puts "Parsing of '#{anExpression}' failed"
+      puts "Reason: #{result.failure_reason.message}"
+      exit(1)
+    end
+    return result
+  end
+  def print_cst(aParseResult)
+    # Generate a parse tree from the parse result
+    ptree = aParseResult.parse_tree
+    # Let's create a parse tree visitor
+    visitor = Rley::ParseTreeVisitor.new(ptree)
+    # Now output formatted parse tree
+    renderer = Rley::Formatter::Asciitree.new($stdout)
+    renderer.render(visitor)
+  end
+  def build_ast(aParseResult)
+    tree_builder = CalcASTBuilder
+    # Generate an abstract syntax tree from the parse result
+    ast = aParseResult.parse_tree(tree_builder)
+    return ast.root
+  end
+  def expect_expr(anExpression)
+    parsing = parse_expression(anExpression)
+    ast = build_ast(parsing)
+    return expect(ast.interpret)
+  end
+  context 'Parsing valid expressions' do
+    it 'should evaluate simple number literals' do
+      expect_expr('2').to eq(2)
+    end
+    it 'should evaluate positive number literals' do
+      expect_expr('+2').to eq(2)
+      expect_expr('+ 2').to eq(2)
+    end
+    it 'should evaluate negative number literals' do
+      expect_expr('-2').to eq(-2)
+      expect_expr('- 2').to eq(-2)
+    end
+    it 'should evaluate addition' do
+      expect_expr('2 + 2').to eq(4)
+    end
+    it 'should evaluate subtraction' do
+      expect_expr('2.1 - 2').to be_within(0.000000000000001).of(0.1)
+    end
+    it 'handles negative numbers' do
+       expect_expr('3--2').to eq(5)
+    end
+    it 'should evaluate division' do
+      expect_expr('10.5 / 5').to eq(2.1)
+    end
+    it 'should evaluate multiplication' do
+      expect_expr('2 * 3.1').to eq(6.2)
+    end
+    it 'should evaluate exponentiation' do
+      expect_expr('5 ** (3 - 1)').to eq(25)
+      expect_expr('25 ** 0.5').to eq(5)
+    end
+    it 'should change sign of expression in parentheses' do
+      expect_expr('- (2 * 5)').to eq(-10)
+    end
+    it 'should evaluate parentheses' do
+      expect_expr('2 * (2.1 + 1)').to eq(6.2)
+    end
+    it 'should evaluate regardless of whitespace' do
+      expect_expr("2*(1+\t1)").to eq(4)
+    end
+    it 'should evaluate order of operations' do
+      expect_expr('2 * 2.1 + 1 / 2').to eq 4.7
+    end
+    it 'should evaluate multiple levels of parentheses' do
+      expect_expr('2*(1/(1+3))').to eq(0.5)
+    end
+  end # context
+end # describe
+# End of file

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.5.04'.freeze
+  Version = '0.5.05'.freeze
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm".freeze

data/lib/rley/gfg/grm_flow_graph.rb CHANGED

@@ -10,7 +10,30 @@ require_relative 'shortcut_edge'
 module Rley # This module is used as a namespace
   module GFG # This module is used as a namespace
-    # TODO: add definition
+    # A Grammar Flow Graph (GFG) represents the parsing states of productions
+    # rules from a context-free grammar. This representation is based on a
+    # directed graph structure. The parsing process can then be re-formulated
+    # as a path problem in the graph. The theory behind GFGs can be found in
+    # papers. The first article on GFG can be found here:
+    # https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
+    # There are three types of vertex in a GFG:
+    # start vertex, end vertex and item vertex.
+    # For each non-terminal symbol N of the grammar, there is:
+    # a start vertex with label '.N'
+    # an end vertex with label 'N.'
+    # For each production rule of the grammar:
+    # N => s1 s2 s3 (...) sk
+    # I.e. a rule with k grammar symbols in its right-handed side.
+    # For such a rule there will be k + 1 item vertices. By convention,
+    # the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
+    # the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
+    # the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
+    # and so on. In other words, the labels are obtained by moving a dot
+    # in successive positions in the rhs. The dot represents the
+    # parse progress for the production rule. Symbols on the left of the
+    # dot represent the symbols that were successfully matched in the input.
+    # A GFG has three types of directed edges linking the vertices.
+    # call edge, return edge and scan edge.
     class GrmFlowGraph
       # The set of all vertices in the graph
       attr_reader :vertices
@@ -24,6 +47,9 @@ module Rley # This module is used as a namespace
       # A Hash with pairs of the form: non-terminal symbol => end node
       attr_reader :end_vertex_for
+      # Constructor.
+      # @param theDottedItems [Array<DottedItem>] an array of the dotted items
+      # of the grammar.
       def initialize(theDottedItems)
         @vertices = []
         @start_vertex_for = {}
@@ -32,7 +58,9 @@ module Rley # This module is used as a namespace
         build_graph(theDottedItems)
       end
-      # Return the vertex with given vertex label.
+      # Retrieve the vertex with given vertex label.
+      # @param aVertexLabel [String] the label of a vertex from the graph
+      # @return [Vertex] the vertex with the given label, otherwise nil.
       def find_vertex(aVertexLabel)
         vertices.find { |a_vertex| a_vertex.label == aVertexLabel }
       end
@@ -42,7 +70,7 @@ module Rley # This module is used as a namespace
       # If one wants to remove useless rules, then do first:
       # elimination of non-generating symbols
       # then elimination of unreachable symbols
-      def diagnose
+      def diagnose()
         mark_unreachable_symbols
       end
@@ -65,15 +93,6 @@ module Rley # This module is used as a namespace
           return next_one
         end
       end
-      def print_vertex(aText, aVertex)
-        print aText + ' '
-        if aVertex.kind_of?(NonTerminalVertex)
-          puts "#{aVertex.class} #{aVertex.non_terminal.name}"
-        else
-          p(aVertex.label)
-        end
-      end
       # Walk over all the vertices of the graph that are reachable from a given
       # start vertex. This is a depth-first graph traversal.
@@ -137,6 +156,16 @@ module Rley # This module is used as a namespace
         @start_vertex = aVertex if vertices.empty?
         vertices << aVertex
       end
+      # For debugging purposes
+      def print_vertex(aText, aVertex)
+        print aText + ' '
+        if aVertex.kind_of?(NonTerminalVertex)
+          puts "#{aVertex.class} #{aVertex.non_terminal.name}"
+        else
+          p(aVertex.label)
+        end
+      end
       def build_graph(theDottedItems)
         build_all_starts_ends(theDottedItems)

data/lib/rley/gfg/vertex.rb CHANGED

@@ -6,30 +6,37 @@ module Rley # This module is used as a namespace
     # - To know its label
     class Vertex
       # The edges linking the successor vertices to this one.
+      # @!attribute [r] edges
+      # @return [Array<Edge>] The edge(s) linking this vertex to successor(s)
       attr_reader :edges
+      # Constructor to override.
       def initialize()
         @edges = []
       end
-      # Add an graph edge to this vertex
+      # Add an graph edge to this vertex.
+      # @param anEdge [Edge] the edge to be added.
       def add_edge(anEdge)
         arrow = check_add_edge(anEdge)
         edges << arrow
       end
-      # Returns true iff the vertex corresponds to an dotted item that has
+      # Determine if the vertex corresponds to an dotted item that has
       # its dot at the end of a production (i.e. is a reduced item).
+      # @return [Boolean] true iff vertex corresponds to reduced item.
       def complete?()
         return false # Default implementation
       end
-      # Return the symbol before the dot else nil.
+      # Retrieve the grammar symbol before the dot.
+      # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
       def prev_symbol()
         return nil # Default implementation
       end
-      # Return the symbol after the dot else nil.
+      # Retrieve the grammar symbol after the dot.
+      # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
       def next_symbol()
         return nil # Default implementation
       end

data/lib/rley/parser/gfg_parsing.rb CHANGED

@@ -27,7 +27,9 @@ module Rley # This module is used as a namespace
       # The reason of a parse failure
       attr_reader(:failure_reason)
+      # Constructor
+      # @param theGFG [GrmFlowGraph] the Grammar Flow Graph
+      # @param theTokens [Array<Token>] the array of input tokens
       def initialize(theGFG, theTokens)
         @gf_graph = theGFG
         @tokens = theTokens.dup
@@ -46,8 +48,34 @@ module Rley # This module is used as a namespace
         next_symbol = anEntry.next_symbol
         start_vertex = gf_graph.start_vertex_for[next_symbol]
         pos = aPosition
+        size_before = chart[pos].size
         apply_rule(anEntry, start_vertex, pos, pos, :call_rule)
+        if next_symbol.nullable? && anEntry.dotted_entry?
+          size_after = chart[pos].size
+          # ...apply the Nullable rule
+          nullable_rule(anEntry, aPosition) if size_after == size_before
+        end
       end
+      # Let the current sigma set be the ith parse entry set.
+      # This method is invoked when a dotted entry is added
+      # to the parse entry set of the from [A => alpha . B beta, k]
+      # and B is nullable
+      # Then the entry [A => alpha B . beta, k] is added to the current
+      # sigma set.
+      def nullable_rule(anEntry, aPosition)
+        next_symbol = anEntry.next_symbol
+        end_vertex = gf_graph.end_vertex_for[next_symbol]
+        pos = aPosition
+        end_entry = push_entry(end_vertex, anEntry.origin, pos, :nullable_rule)
+        curr_vertex = anEntry.vertex
+        next_vertex = curr_vertex.shortcut.successor
+        # first pos == origin
+        # second pos == position
+        apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
+      end
       # Let the current sigma set be the ith parse entry set.
       # This method is invoked when an entry is added to a parse entry set

data/lib/rley/parser/parse_entry_set.rb CHANGED

@@ -40,8 +40,9 @@ module Rley # This module is used as a namespace
       # Append the given entry (if it isn't yet in the set)
       # to the list of parse entries
       # @param anEntry [ParseEntry] the parse entry to push.
-      # @return [ParseEntry] the passed parse entry it doesn't   added
+      # @return [ParseEntry] the passed parse entry if it pushes it
       def push_entry(anEntry)
+        # TODO: control overhead next line
         match = entries.find { |entry| entry == anEntry }
         if match
           result = match