RubyGems - rley - Versions diffs - 0.5.07 → 0.5.08 - Mend

rley 0.5.07 → 0.5.08

Files changed (35) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
data/examples/general/SRL/lib/ast_builder.rb +176 -0
data/examples/general/SRL/lib/ast_building.rb +20 -0
data/examples/general/SRL/lib/grammar.rb +32 -0
data/examples/general/SRL/lib/parser.rb +26 -0
data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
data/examples/general/SRL/lib/regex_repr.rb +1 -0
data/examples/general/SRL/lib/srl_demo.rb +67 -0
data/examples/general/SRL/lib/tokenizer.rb +101 -0
data/examples/general/SRL/spec/integration_spec.rb +103 -0
data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
data/examples/general/SRL/spec/spec_helper.rb +25 -0
data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
data/examples/general/SRL/srl_demo.rb +57 -0
data/examples/general/calc_iter1/calc_demo.rb +1 -1
data/examples/general/calc_iter2/ast_building.rb +20 -0
data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
data/examples/general/calc_iter2/calc_demo.rb +1 -1
data/lib/rley/base/base_parser.rb +1 -1
data/lib/rley/base/grm_items_builder.rb +1 -1
data/lib/rley/constants.rb +1 -1
data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
data/lib/rley/parser/gfg_chart.rb +8 -3
data/lib/rley/parser/gfg_earley_parser.rb +5 -2
data/lib/rley/parser/gfg_parsing.rb +5 -1
data/lib/rley/parser/parse_tree_builder.rb +16 -5
data/lib/rley/ptree/terminal_node.rb +3 -2
data/spec/rley/parser/ast_builder_spec.rb +2 -2
data/spec/rley/parser/cst_builder_spec.rb +2 -3
metadata +20 -4

data/examples/general/SRL/spec/spec_helper.rb ADDED

@@ -0,0 +1,25 @@
+# File: spec_helper.rb
+# Purpose: utility file that is loaded by all our RSpec files
+require 'simplecov'
+SimpleCov.formatter = SimpleCov::Formatter::MultiFormatter.new(
+  [
+    SimpleCov::Formatter::HTMLFormatter,
+  ]
+)
+require 'pp'    # Use pretty-print for debugging purposes
+require 'rspec' # Use the RSpec framework
+RSpec.configure do |config|
+  config.expect_with :rspec do |c|
+    # Disable the `should` syntax...
+    c.syntax = :expect
+  end
+  # Display stack trace in case of failure
+  config.full_backtrace = true
+end
+# End of file

data/examples/general/SRL/spec/tokenizer_spec.rb ADDED

@@ -0,0 +1,125 @@
+require_relative 'spec_helper' # Use the RSpec framework
+require_relative '../lib/grammar'
+require_relative '../lib/tokenizer' # Load the class under test
+module SRL
+  describe Tokenizer do
+    def match_expectations(aTokenizer, theExpectations)
+      aTokenizer.tokens.each_with_index do |token, i|
+        terminal, lexeme = theExpectations[i]
+        expect(token.terminal.name).to eq(terminal)
+        expect(token.lexeme).to eq(lexeme)
+      end
+    end
+    subject { Tokenizer.new('', SRL::Grammar) }
+    context 'Initialization:' do
+      it 'should be initialized with a text to tokenize and a grammar' do
+        expect { Tokenizer.new('anything', SRL::Grammar) }.not_to raise_error
+      end
+      it 'should have its scanner initialized' do
+        expect(subject.scanner).to be_kind_of(StringScanner)
+      end
+    end # context
+    context 'Single token recognition:' do
+      # it 'should tokenize delimiters and separators' do
+        # subject.scanner.string = ','
+        # token = subject.tokens.first
+        # expect(token).to be_kind_of(Rley::Lexical::Token)
+        # expect(token.terminal.name).to eq('COMMA')
+        # expect(token.lexeme).to eq(',')
+      # end
+      it 'should tokenize keywords' do
+        sample = 'between Exactly oncE optional TWICE'
+        subject.scanner.string = sample
+        subject.tokens.each do |tok|
+          expect(tok).to be_kind_of(Rley::Lexical::Token)
+          expect(tok.terminal.name).to eq(tok.lexeme.upcase)
+        end
+      end
+      it 'should tokenize integer values' do
+        subject.scanner.string = ' 123 '
+        token = subject.tokens.first
+        expect(token).to be_kind_of(Rley::Lexical::Token)
+        expect(token.terminal.name).to eq('INTEGER')
+        expect(token.lexeme).to eq('123')
+      end
+      it 'should tokenize single digits' do
+        subject.scanner.string = ' 1 '
+        token = subject.tokens.first
+        expect(token).to be_kind_of(Rley::Lexical::Token)
+        expect(token.terminal.name).to eq('DIGIT')
+        expect(token.lexeme).to eq('1')
+      end
+    end # context
+    context 'Quantifier tokenization:' do
+      it "should recognize 'exactly ... times'" do
+        input = 'exactly 4 Times'
+        subject.scanner.string = input
+        expectations = [
+          ['EXACTLY', 'exactly'],
+          ['DIGIT', '4'],
+          ['TIMES', 'Times']
+        ]
+        match_expectations(subject, expectations)
+      end
+      it "should recognize 'between ... and ... times'" do
+        input = 'Between 2 AND 4 times'
+        subject.scanner.string = input
+        expectations = [
+          ['BETWEEN', 'Between'],
+          ['DIGIT', '2'],
+          ['AND', 'AND'],
+          ['DIGIT', '4'],
+          ['TIMES', 'times']
+        ]
+        match_expectations(subject, expectations)
+      end
+      it "should recognize 'once or more'" do
+        input = 'Once or MORE'
+        subject.scanner.string = input
+        expectations = [
+          ['ONCE', 'Once'],
+          ['OR', 'or'],
+          ['MORE', 'MORE']
+        ]
+        match_expectations(subject, expectations)
+      end
+      it "should recognize 'never or more'" do
+        input = 'never or more'
+        subject.scanner.string = input
+        expectations = [
+          ['NEVER', 'never'],
+          ['OR', 'or'],
+          ['MORE', 'more']
+        ]
+        match_expectations(subject, expectations)
+      end
+      it "should recognize 'at least  ... times'" do
+        input = 'at least 10 times'
+        subject.scanner.string = input
+        expectations = [
+          ['AT', 'at'],
+          ['LEAST', 'least'],
+          ['INTEGER', '10'],
+          ['TIMES', 'times']
+        ]
+        match_expectations(subject, expectations)
+      end
+    end # context
+  end # describe
+end # module

data/examples/general/SRL/srl_demo.rb ADDED

@@ -0,0 +1,57 @@
+require_relative './lib/parser'
+def print_title(aTitle)
+  puts aTitle
+  puts '=' * aTitle.size
+end
+def print_tree(aTitle, aParseTree)
+  # Let's create a parse tree visitor
+  visitor = Rley::ParseTreeVisitor.new(aParseTree)
+  # Now output formatted parse tree
+  print_title(aTitle)
+  renderer = Rley::Formatter::Asciitree.new($stdout)
+  renderer.render(visitor)
+  puts ''
+end
+# Create a calculator parser object
+parser = SRL::Parser.new
+# Parse the input expression in command-line
+if ARGV.empty?
+  my_name = File.basename(__FILE__)
+  msg = <<-END_MSG
+WORK IN PROGRESS
+Simple Regex Language parser:
+- Parses a very limited subset of the language and displays the parse tree
+Command-line syntax:
+  ruby #{my_name} "quantifier expression"
+  where:
+    the SRL quantifier expression is enclosed between double quotes (")
+  Examples:
+  ruby #{my_name} "exactly 4 times"
+  ruby #{my_name} "between 2 and 3 times"
+END_MSG
+  puts msg
+  exit(1)
+end
+puts ARGV[0]
+result = parser.parse_SRL(ARGV[0])
+unless result.success?
+  # Stop if the parse failed...
+  puts "Parsing of '#{ARGV[0]}' failed"
+  puts "Reason: #{result.failure_reason.message}"
+  exit(1)
+end
+# Generate a concrete syntax parse tree from the parse result
+cst_ptree = result.parse_tree
+print_tree('Concrete Syntax Tree (CST)', cst_ptree)
+# End of file

data/examples/general/calc_iter1/calc_demo.rb CHANGED

@@ -5,7 +5,7 @@ require_relative 'calc_ast_builder'
 if ARGV.empty?
   my_name = File.basename(__FILE__)
   msg = <<-END_MSG
-Command-line symtax:
+Command-line syntax:
   ruby #{my_name} "arithmetic expression"
   where:
     the arithmetic expression is enclosed between double quotes (")

data/examples/general/calc_iter2/ast_building.rb ADDED

@@ -0,0 +1,20 @@
+# Mix-in module that provides convenenience methods for
+# constructing an AST (Abstract Syntax Tree).
+module ASTBuilding
+  def return_first_child(_range, _tokens, theChildren)
+    return theChildren[0]
+  end
+  def return_second_child(_range, _tokens, theChildren)
+    return theChildren[1]
+  end
+  def return_last_child(_range, _tokens, theChildren)
+    return theChildren[-1]
+  end
+  def return_epsilon(_range, _tokens, _children)
+    return nil
+  end
+end # module
+# End of file

data/examples/general/calc_iter2/calc_ast_builder.rb CHANGED

@@ -1,3 +1,4 @@
+require_relative 'ast_building'
 require_relative 'calc_ast_nodes'
 # The purpose of a CalcASTBuilder is to build piece by piece an AST
@@ -8,6 +9,8 @@ require_relative 'calc_ast_nodes'
 # (say, a parse tree) from simpler objects (terminal and non-terminal
 # nodes) and using a step by step approach.
 class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
+  include ASTBuilding
   Terminal2NodeClass = {
     # Lexical ambiguity: minus sign represents two very concepts:
     # The unary negation operator on one hand, the binary substraction operator
@@ -23,29 +26,6 @@ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
   protected
-  def return_first_child(_range, _tokens, theChildren)
-    return theChildren[0]
-  end
-  def return_second_child(_range, _tokens, theChildren)
-    return theChildren[1]
-  end
-  def return_last_child(_range, _tokens, theChildren)
-    return theChildren[-1]
-  end
-  def return_epsilon(_range, _tokens, _children)
-    return nil
-  end
-  # Overriding method.
-  # Create a parse tree object with given
-  # node as root node.
-  def create_tree(aRootNode)
-    return Rley::PTree::ParseTree.new(aRootNode)
-  end
   # Overriding method.
   # Factory method for creating a node object for the given
   # input token.

data/examples/general/calc_iter2/calc_demo.rb CHANGED

@@ -28,7 +28,7 @@ Demo calculator that prints:
 - The Concrete and Abstract Syntax Trees of the math expression.
 - The result of the math expression.
-Command-line symtax:
+Command-line syntax:
   ruby #{my_name} "arithmetic expression"
   where:
     the arithmetic expression is enclosed between double quotes (")

data/lib/rley/base/base_parser.rb CHANGED

@@ -17,7 +17,7 @@ module Rley # This module is used as a namespace
       attr_reader(:dotted_items)
       # Constructor.
-      # @param [Syntax::Grammar] The grammar of the language.
+      # @param aGrammar [Syntax::Grammar] The grammar of the language.
       def initialize(aGrammar)
         @grammar = aGrammar
         @dotted_items = build_dotted_items(grammar) # Method from mixin

data/lib/rley/base/grm_items_builder.rb CHANGED

@@ -5,7 +5,7 @@ module Rley # This module is used as a namespace
     # Mix-in module. Builds the dotted items for a given grammar
     module GrmItemsBuilder
       # Build an array of dotted items from the productions of passed grammar.
-      # @param [Syntax::Grammar]
+      # @param aGrammar [Syntax::Grammar]
       # @return [Array<DottedItem>]
       def build_dotted_items(aGrammar)
         items = []

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.5.07'.freeze
+  Version = '0.5.08'.freeze
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm".freeze

data/lib/rley/gfg/non_terminal_vertex.rb CHANGED

@@ -14,7 +14,7 @@ module Rley # This module is used as a namespace
       attr_reader :non_terminal
       # Constructor to specialize in subclasses.
-      # @param [Syntax::NonTerminal]
+      # @param aNonTerminal [Syntax::NonTerminal]
       def initialize(aNonTerminal)
         super()
         @non_terminal = aNonTerminal

data/lib/rley/parser/gfg_chart.rb CHANGED

@@ -9,26 +9,29 @@ module Rley # This module is used as a namespace
     # Assuming that n == number of input tokens,
     # the chart is an array with n + 1 entry sets.
     class GFGChart
-      # An array of entry sets (one per input token + 1)
+      # @return [Array<ParseEntrySet>] entry sets (one per input token + 1)
       attr_reader(:sets)
       # @param tokenCount [Integer] The number of lexemes in the input to parse.
+      # @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
       def initialize(tokenCount, aGFGraph)
         @sets = Array.new(tokenCount + 1) { |_| ParseEntrySet.new }
         push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
       end
-      # Return the start (non-terminal) symbol of the grammar.
+      # @return [Syntax::NonTerminal] the start symbol of the grammar.
       def start_symbol()
         return sets.first.entries[0].vertex.non_terminal
       end
-      # Access the entry set at given position
+      # @param index [Integer]
+      # @return [ParseEntrySet] Access the entry set at given position
       def [](index)
         return sets[index]
       end
       # Return the index value of the last non-empty entry set.
+      # @return [Integer]
       def last_index()
         first_empty = sets.find_index(&:empty?)
         index = if first_empty.nil?
@@ -49,11 +52,13 @@ module Rley # This module is used as a namespace
       end
       # Retrieve the first parse entry added to this chart
+      # @return [ParseEntry]
       def initial_entry()
         return sets[0].first
       end
       # Retrieve the entry that corresponds to a complete and successful parse
+      # @return [ParseEntry]
       def accepting_entry()
         # Success can be detected as follows:
         # The last chart entry set has at least one complete parse entry

data/lib/rley/parser/gfg_earley_parser.rb CHANGED

@@ -6,9 +6,12 @@ module Rley # This module is used as a namespace
   module Parser # This module is used as a namespace
     # Implementation of a parser that uses the Earley parsing algorithm.
     class GFGEarleyParser < Base::BaseParser
-      # The Grammar Flow graph for the given grammar
+      # The Grammar Flow graph generated from the provided grammar.
+      # @return [GFG::GrmFlowGraph] The GFG that drives the parsing
       attr_reader :gf_graph
+      # Constructor.
+      # @param aGrammar [Syntax::Grammar] The grammar of the language to parse.
       def initialize(aGrammar)
         super(aGrammar)
         @gf_graph = GFG::GrmFlowGraph.new(dotted_items)
@@ -17,7 +20,7 @@ module Rley # This module is used as a namespace
       # Parse a sequence of input tokens.
       # @param aTokenSequence [Array] Array of Tokens objects returned by a
       # tokenizer/scanner/lexer.
-      # @return [Parsing] an object that embeds the parse results.
+      # @return [GFGParsing] an object that embeds the parse results.
       def parse(aTokenSequence)
         result = GFGParsing.new(gf_graph, aTokenSequence)
         last_token_index = aTokenSequence.size

data/lib/rley/parser/gfg_parsing.rb CHANGED

@@ -9,12 +9,15 @@ module Rley # This module is used as a namespace
   module Parser # This module is used as a namespace
     class GFGParsing
       # The link to the grammar flow graph
+      # @return [GFG::GrmFlowGraph] The GFG that drives the parsing
       attr_reader(:gf_graph)
       # The link to the chart object
+      # @return [GFGChart]
       attr_reader(:chart)
       # The sequence of input token to parse
+      # @return [Array<Lexical::Token>]
       attr_reader(:tokens)
       # A Hash with pairs of the form:
@@ -22,13 +25,14 @@ module Rley # This module is used as a namespace
       # It associates to a every parse entry its antecedent(s), that is,
       # the parse entry/ies that causes the key parse entry to be created
       # with one the gfg rules
+      # @return [Hash{ParseEntry => Array<ParseEntry>}]
       attr_reader(:antecedence)
       # The reason of a parse failure
       attr_reader(:failure_reason)
       # Constructor
-      # @param theGFG [GrmFlowGraph] the Grammar Flow Graph
+      # @param theGFG [GFG::GrmFlowGraph] the Grammar Flow Graph
       # @param theTokens [Array<Token>] the array of input tokens
       def initialize(theGFG, theTokens)
         @gf_graph = theGFG

data/lib/rley/parser/parse_tree_builder.rb CHANGED

@@ -12,6 +12,9 @@ module Rley # This module is used as a namespace
   module Parser # This module is used as a namespace
     # Structure used internally by ParseTreeBuilder class.
     CSTRawNode = Struct.new(:range, :symbol, :children) do
+      # Constructor.
+      # @param aSymbol [Lexical::TokenRange] The token position range.
+      # @param aSymbol [Syntax::Symbol] A symbol from grammar.
       def initialize(aRange, aSymbol)
         super
         self.range = aRange
@@ -41,12 +44,13 @@ module Rley # This module is used as a namespace
       def initialize(theTokens)
         @tokens = theTokens
         @stack = []
+        @dummy_node = Object.new.freeze
       end
       # Receive events resulting from a visit of GFGParsing object.
       # These events are produced by a specialized Enumerator created
       # with a ParseWalkerFactory instance.
-      # @param anEvent [Symbol] Kind of visit event. Should be: :visit
+      # @param anEvent [Syntax::Symbol] Kind of visit event. Should be: :visit
       # @param anEntry [ParseEntry] The entry being visited
       # @param anIndex [anIndex] The token index associated with anEntry
       def receive_event(anEvent, anEntry, anIndex)
@@ -71,6 +75,13 @@ module Rley # This module is used as a namespace
         return @stack
       end
+      # Overriding method.
+      # Create a parse tree object with given
+      # node as root node.
+      def create_tree(aRootNode)
+        return Rley::PTree::ParseTree.new(aRootNode)
+      end
       private
       # Return the top of stack element.
@@ -173,7 +184,7 @@ module Rley # This module is used as a namespace
       # Initialize children array of TOS with nil placeholders.
       # The number of elements equals the number of symbols at rhs.
       def init_TOS_children(aCount)
-        tos.children = Array.new(aCount)
+        tos.children = Array.new(aCount) { |_index| @dummy_node }
       end
       # Does the position on the left side of the dot correspond
@@ -213,15 +224,15 @@ module Rley # This module is used as a namespace
       # array at that position.
       # If the position is nil, then the node will be placed at the position of
       # the rightmost nil element in children array.
-      def place_TOS_child(aNode, aRHSPos)
+      def place_TOS_child(aNode, aRHSPos)
         if aRHSPos.nil?
           # Retrieve index of most rightmost nil child...
-          pos = tos.children.rindex(&:nil?)
+          pos = tos.children.rindex { |child| child == @dummy_node }
           raise StandardError, 'Internal error' if pos.nil?
         else
           pos = aRHSPos
         end
         tos.children[pos] = aNode
       end