RubyGems - rley - Versions diffs - 0.1.11 → 0.1.12 - Mend

rley 0.1.11 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +8 -8
data/CHANGELOG.md +5 -0
data/examples/parsers/parsing_b_expr.rb +85 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/parse_state.rb +5 -0
data/lib/rley/parser/parsing.rb +23 -7
data/lib/rley/parser/state_set.rb +7 -5
data/lib/rley/ptree/parse_tree.rb +12 -1
data/spec/rley/parser/parsing_spec.rb +25 -7
data/spec/rley/ptree/parse_tree_spec.rb +3 -1
data/spec/rley/support/grammar_b_expr_helper.rb +39 -0
metadata +4 -2

checksums.yaml CHANGED

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    MDNkYmEzYTFiNmUyMzk2MTgwOTczZmNlMTVjMDU3ZDIzNjI4YWNmYw==
+    NTc2NGE4MzYxOTc1ZDUyMDVkYjdmNGFhODllNmEwM2YxMjVkZDk1OQ==
   data.tar.gz: !binary |-
-    ZWQ1MmMzMDA2NzcxOTUzM2ZjMDg0Yjg2OTg0MTVhODgzOTQ1OTExNg==
+    ZDBkMTdmZWM2NTMwYWMwNDFkNDQ4NGI2YzdkNjk3NDU0ZGExMGYzNA==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    ZjhmMDlmZjk2ZDYwY2EwNDBjZjIxYWMyMDA1ZjQ4MTVmOWE0NjRhZmI4OTg1
-    MWZiMzQwNzFkOGIyMzJmZTdmMTg2NTRkOGVmM2ViNjg0MzU1MjllODJkZTIx
-    NzQ4NDA3NmUwYzY4YzBhMmYwODg1MGIzNGJmMDU3NDBjMzJmZDY=
+    YWZjYmQxNDNiNjVmMDYyYWI1YzM0YzMyN2VjMzk4ZjUxOTIyMmQxNTE4Y2Y4
+    YjM4NTAyNGNhNjhiOWRhNWMyZWVmYzRhYjFjNzhhOWEzMDY1ZTgzMzRiMTVh
+    Njg1MDRjMjQ5NTlhYmU3NTk2MDBiMWQyZmI3MDIyMWUwNGM1NjM=
   data.tar.gz: !binary |-
-    OTBhM2JiNzRiNzJlNmNmZWRjODJhMzY3MzkyMDdkNDRmNzcyYzFiMGNmYThm
-    YWVhMTBjMzkzMDI1NzMyNTNiOTkzZGFlODAxYzcyYWQ0Y2QyNGEwZWUxYjBj
-    YTk0OTViZWVkODk5N2U4MmUxZWJlZTBjY2QxOTNmMDk5YmIxZjU=
+    ODZiMmY1ZDMwNTVlNmM3ZGJmZDIzOTAzYjQ1MTFlMmY4OTBlODhhZWZhN2M3
+    MjEzYjI0YjZhNTA4NzkzYjJiMzMwY2Y0NzliMGMyZDdlMjI3NjkzMTliYWNh
+    YzQ2MGVmNWM1NTA5M2IxYjcxNzliNzhhMzViMDE4OTM3NWY1NjI=

data/CHANGELOG.md CHANGED

@@ -1,3 +1,8 @@
+### 0.1.12 / 2014-12-22
+* [FIX] Fixed `Parsing#parse_tree`: code couldn't cope with parse state set containing more
+  than one parse state that expected the same symbol.
+* [NEW] Added one more parser example (for very basic arithmetic expression)
 ### 0.1.11 / 2014-12-16
 * [FIX] Fixed all but one YARD (documentation) warnings. Most of them were due to mismatch
 in method argument names between source code and documentation.

data/examples/parsers/parsing_b_expr.rb ADDED

@@ -0,0 +1,85 @@
+# Purpose: to demonstrate how to parse basic arithmetic expressions
+# and render a parse tree
+require 'pp' # TODO remove this dependency
+require 'rley'  # Load the gem
+# Steps to render a parse tree (of a valid parsed input):
+# 1. Define a grammar
+# 2. Create a tokenizer for the language
+# 3. Create a parser for that grammar
+# 4. Tokenize the input
+# 5. Let the parser process the input
+# 6. Generate a parse tree from the parse result
+# 7. Render the parse tree (in JSON)
+########################################
+# Step 1. Define a grammar for a very simple arithmetic expression language
+# (based on example in article on Earley's algorithm in Wikipedia)
+# Let's create the grammar piece by piece
+builder = Rley::Syntax::GrammarBuilder.new
+builder.add_terminals('+', '*', 'integer')
+builder.add_production('P' => 'S')
+builder.add_production('S' => %w(S + M))
+builder.add_production('S' => 'M')
+builder.add_production('M' => %w(M * T))
+builder.add_production('M' => 'T')
+builder.add_production('T' => 'integer')
+# And now build the grammar...
+grammar_s_expr = builder.grammar
+########################################
+# 2. Create a tokenizer for the language
+# The tokenizer transforms the input into an array of tokens
+def tokenizer(aText, aGrammar)
+  tokens = aText.scan(/\S+/).map do |lexeme|
+    case lexeme
+      when '+', '*'
+        terminal = aGrammar.name2symbol[lexeme]
+      when /^[-+]?\d+$/
+        terminal = aGrammar.name2symbol['integer']
+      else
+        msg = "Unknown input text '#{lexeme}'"
+        fail StandardError, msg
+    end
+    Rley::Parser::Token.new(lexeme, terminal)
+  end
+  return tokens
+end
+########################################
+# Step 3. Create a parser for that grammar
+parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
+########################################
+# Step 3. Tokenize the input
+valid_input = '2 + 3 * 4'
+tokens = tokenizer(valid_input, grammar_s_expr)
+########################################
+# Step 5. Let the parser process the input
+result = parser.parse(tokens)
+puts "Parse successful? #{result.success?}"
+pp result
+########################################
+# Step 6. Generate a parse tree from the parse result
+ptree = result.parse_tree
+=begin
+########################################
+# Step 7. Render the parse tree (in JSON)
+# Let's create a parse tree visitor
+visitor = Rley::ParseTreeVisitor.new(ptree)
+#Here we create a renderer object...
+renderer = Rley::Formatter::Json.new(STDOUT)
+# Now emit the parse tree as JSON on the console output
+puts "JSON rendering of the parse tree for '#{valid_input}' input:"
+renderer.render(visitor)
+=end
+# End of file

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.1.11'
+  Version = '0.1.12'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/parser/parse_state.rb CHANGED

@@ -31,6 +31,11 @@ module Rley # This module is used as a namespace
         return dotted_rule.reduce_item?
       end
+      # Returns true if the dot is at the start of the rhs of the production.
+      def predicted?()
+        return dotted_rule.predicted_item?
+      end
       # Next expected symbol in the production
       def next_symbol()
         return dotted_rule.next_symbol

data/lib/rley/parser/parsing.rb CHANGED

@@ -1,6 +1,7 @@
 require_relative 'chart'
 require_relative '../ptree/parse_tree'
 module Rley # This module is used as a namespace
   module Parser # This module is used as a namespace
     class Parsing
@@ -47,19 +48,23 @@ module Rley # This module is used as a namespace
                 parse_state)
             when Syntax::NonTerminal
-              # Retrieve complete states
+              # Retrieve complete states with curr_symbol as lhs
               new_states = chart[state_set_index].states_rewriting(curr_symbol)
               # TODO: make this more robust
               parse_state = new_states[0]
               curr_dotted_item = parse_state.dotted_rule
+              # Additional check
+              if ptree.current_node.symbol != curr_dotted_item.production.lhs
+                ptree.step_back(state_set_index)
+              end
               ptree.current_node.range = { low: parse_state.origin }
               node_range =  ptree.current_node.range
               ptree.add_children(curr_dotted_item.production, node_range)
               link_node_to_token(ptree, state_set_index - 1)
-            when NilClass
+            when NilClass # No symbol on the left of dot
               lhs = curr_dotted_item.production.lhs
-              new_states = chart[state_set_index].states_expecting(lhs)
+              new_states = states_expecting(lhs, state_set_index, true)
               break if new_states.empty?
               # TODO: make this more robust
               parse_state = new_states[0]
@@ -98,7 +103,7 @@ module Rley # This module is used as a namespace
         curr_token = tokens[aPosition]
         return unless curr_token.terminal == aTerminal
-        states = states_expecting(aTerminal, aPosition)
+        states = states_expecting(aTerminal, aPosition, false)
         states.each do |s|
           next_item = nextMapping.call(s.dotted_rule)
           push_state(next_item, s.origin, aPosition + 1)
@@ -119,7 +124,7 @@ module Rley # This module is used as a namespace
       def completion(aState, aPosition, &nextMapping)
         curr_origin = aState.origin
         curr_lhs = aState.dotted_rule.lhs
-        states = states_expecting(curr_lhs, curr_origin)
+        states = states_expecting(curr_lhs, curr_origin, false)
         states.each do |s|
           next_item = nextMapping.call(s.dotted_rule)
           push_state(next_item, s.origin, aPosition)
@@ -129,8 +134,19 @@ module Rley # This module is used as a namespace
       # The list of ParseState from the chart entry at given position
       # that expect the given terminal
-      def states_expecting(aTerminal, aPosition)
-        return chart[aPosition].states_expecting(aTerminal)
+      def states_expecting(aTerminal, aPosition, toSort)
+        expecting = chart[aPosition].states_expecting(aTerminal)
+        return expecting if !toSort || expecting.size < 2
+        # Put predicted states ahead
+        (predicted, others) = expecting.partition { |state| state.predicted? }
+        # Sort state in reverse order of their origin value
+        [predicted, others].each do |set|
+          set.sort! { |a,b| b.origin <=> a.origin }
+        end
+        return predicted + others
       end
       private

data/lib/rley/parser/state_set.rb CHANGED

@@ -21,13 +21,15 @@ module Rley # This module is used as a namespace
         @states << aState unless include?(aState)
       end
-      # The list of ParseState that expect the given terminal
-      def states_expecting(aTerminal)
-        return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
+      # The list of ParseState that expect the given symbol.
+      # @param aSymbol [GrmSymbol] the expected symbol
+      #   (=on the right of the dot)
+      def states_expecting(aSymbol)
+        return states.select { |s| s.dotted_rule.next_symbol == aSymbol }
       end
-      # The list of complete ParseState that have the symbol as the lhs of their
-      # production
+      # The list of complete ParseState that have the given non-terminal
+      # symbol as the lhs of their production.
       def states_rewriting(aNonTerm)
         return states.select do |s|
           (s.dotted_rule.production.lhs == aNonTerm) && s.complete?

data/lib/rley/ptree/parse_tree.rb CHANGED

@@ -33,8 +33,19 @@ module Rley # This module is used as a namespace
         aVisitor.end_visit_ptree(self)
       end
+      # Add children to the current node.
+      # The children nodes correspond to the rhs of the production.
+      # Update the range in the children given the passed range object.
+      # Pre-condition: the current node refers to the same (non-terminal)
+      # symbol of the lhs of the given produiction.
+      # @param aProduction [Production] A production rule
+      # @param aRange [TokenRange]
       def add_children(aProduction, aRange)
+        if aProduction.lhs != current_node.symbol
+          msg = "Internal error. Expected symbol was #{aProduction.lhs} but current node is #{current_node.symbol}"
+          fail StandardError, msg
+        end
         aProduction.rhs.each do |symb|
           case symb
             when Syntax::Terminal

data/spec/rley/parser/parsing_spec.rb CHANGED

@@ -7,12 +7,19 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
 require_relative '../../../lib/rley/parser/dotted_item'
 require_relative '../../../lib/rley/parser/token'
 require_relative '../../../lib/rley/parser/earley_parser'
+require_relative '../support/grammar_abc_helper'
+require_relative '../support/grammar_b_expr_helper'
 # Load the class under test
 require_relative '../../../lib/rley/parser/parsing'
 module Rley # Open this namespace to avoid module qualifier prefixes
   module Parser # Open this namespace to avoid module qualifier prefixes
     describe Parsing do
+      include GrammarABCHelper  # Mix-in module with builder for grammar abc
+      include GrammarBExprHelper # Mix-in with builder for simple expressions
       # Grammar 1: A very simple language
       # S ::= A.
       # A ::= "a" A "c".
@@ -87,7 +94,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           item2 = DottedItem.new(prod_A1, 1)
           subject.push_state(item1, 2, 2)
           subject.push_state(item2, 2, 2)
-          states = subject.states_expecting(c_, 2)
+          states = subject.states_expecting(c_, 2, false)
           expect(states.size).to eq(1)
           expect(states[0].dotted_rule).to eq(item1)
         end
@@ -114,11 +121,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       context 'Parse tree building:' do
         let(:sample_grammar1) do
-          builder = Syntax::GrammarBuilder.new
-          builder.add_terminals('a', 'b', 'c')
-          builder.add_production('S' => ['A'])
-          builder.add_production('A' => %w(a A c))
-          builder.add_production('A' => ['b'])
+          builder = grammar_abc_builder
           builder.grammar
         end
@@ -128,13 +131,28 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           end
         end
+        let(:b_expr_grammar) do
+          builder = grammar_expr_builder
+          builder.grammar
+        end
-        it 'should build the parse tree for a non-ambiguous grammar' do
+        it 'should build the parse tree for a simple non-ambiguous grammar' do
           parser = EarleyParser.new(sample_grammar1)
           instance = parser.parse(token_seq1)
           ptree = instance.parse_tree
           expect(ptree).to be_kind_of(PTree::ParseTree)
         end
+        it 'should build the parse tree for a simple expression grammar' do
+          parser = EarleyParser.new(b_expr_grammar)
+          tokens = expr_tokenizer('2 + 3 * 4', b_expr_grammar)
+          instance = parser.parse(tokens)
+          ptree = instance.parse_tree
+          expect(ptree).to be_kind_of(PTree::ParseTree)
+        end
       end # context
     end # describe
   end # module

data/spec/rley/ptree/parse_tree_spec.rb CHANGED

@@ -2,6 +2,7 @@ require_relative '../../spec_helper'
 require_relative '../support/grammar_abc_helper'
 # Load the class under test
 require_relative '../../../lib/rley/ptree/parse_tree'
@@ -9,7 +10,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
   module PTree # Open this namespace to avoid module qualifier prefixes
     describe ParseTree do
       include GrammarABCHelper  # Mix-in module with builder for grammar abc
       let(:sample_grammar) do
         builder = grammar_abc_builder
         builder.grammar

data/spec/rley/support/grammar_b_expr_helper.rb ADDED

@@ -0,0 +1,39 @@
+# Load the builder class
+require_relative '../../../lib/rley/syntax/grammar_builder'
+require_relative '../../../lib/rley/parser/token'
+module GrammarBExprHelper
+  # Factory method. Creates a grammar builder for a basic arithmetic
+  # expression grammar.
+  # (based on example in article on Earley's algorithm in Wikipedia)
+  def grammar_expr_builder()
+    builder = Rley::Syntax::GrammarBuilder.new
+    builder.add_terminals('+', '*', 'integer')
+    builder.add_production('P' => 'S')
+    builder.add_production('S' => %w(S + M))
+    builder.add_production('S' => 'M')
+    builder.add_production('M' => %w(M * T))
+    builder.add_production('M' => 'T')
+    builder.add_production('T' => 'integer')
+    builder
+  end
+  # Basic expression tokenizer
+  def expr_tokenizer(aText, aGrammar)
+  tokens = aText.scan(/\S+/).map do |lexeme|
+    case lexeme
+      when '+', '*'
+        terminal = aGrammar.name2symbol[lexeme]
+      when /^[-+]?\d+$/
+        terminal = aGrammar.name2symbol['integer']
+      else
+        msg = "Unknown input text '#{lexeme}'"
+        fail StandardError, msg
+    end
+    Rley::Parser::Token.new(lexeme, terminal)
+  end
+  return tokens
+end
+end # module

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.1.11
+  version: 0.1.12
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-12-16 00:00:00.000000000 Z
+date: 2014-12-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -102,6 +102,7 @@ files:
 - examples/grammars/grammar_abc.rb
 - examples/grammars/grammar_L0.rb
 - examples/parsers/parsing_abc.rb
+- examples/parsers/parsing_b_expr.rb
 - examples/recognizers/recognizer_abc.rb
 - lib/rley.rb
 - lib/rley/constants.rb
@@ -145,6 +146,7 @@ files:
 - spec/rley/ptree/parse_tree_spec.rb
 - spec/rley/ptree/token_range_spec.rb
 - spec/rley/support/grammar_abc_helper.rb
+- spec/rley/support/grammar_b_expr_helper.rb
 - spec/rley/syntax/grammar_builder_spec.rb
 - spec/rley/syntax/grammar_spec.rb
 - spec/rley/syntax/grm_symbol_spec.rb