RubyGems - rley - Versions diffs - 0.3.09 → 0.3.10 - Mend

rley 0.3.09 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +9 -0
data/README.md +16 -19
data/examples/NLP/mini_en_demo.rb +15 -15
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/parse_forest_builder.rb +21 -20
data/lib/rley/sppf/parse_forest.rb +12 -0
data/lib/rley/syntax/grammar_builder.rb +6 -1
data/spec/rley/parser/parse_forest_factory_spec.rb +1 -1
data/spec/rley/syntax/grammar_builder_spec.rb +30 -9
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d22d11b51c4c72d3d8230c775b797afae04c4e8f
-  data.tar.gz: e87ba3a3beeadd40a4447281c904f8dba4c8dc57
+  metadata.gz: 20ceec50ffa7359dbf635a00184c604654cdf9bb
+  data.tar.gz: 76e9432f62d88127257c66277e21be37fd1b00bd
 SHA512:
-  metadata.gz: 446344010aafea29682d90bd71bdb73f1f189e694fb91907136e00cce015b098e32fb2679af3a0b6ae772d41892478b0a9270c43ebe77cee2c47cb319b7fd0aa
-  data.tar.gz: 9509ea071e10c002e70af379089565b3b28269fb09c1f1d04ee9a04ec78dab7ac916e4f90602bbef5bc3eff8ea0952143faa5ec1d6c54277b60e7db810dab45d
+  metadata.gz: a80c8e38ede7dd3908a52b73fd8fa13fc7087537d8272c8d648764902cb29f0d5db23c157a2b704eb66e2c8322d7e2ee4822dfd36fadb25ac22e6ec9f517f3eb
+  data.tar.gz: 9d23a806c510790c6fcc19566520e11c9292d642b2debe20784c4a884d646c173a789ae3060345ad659698554fce6748078ef7d5cc2353f7e1c3495ecd156104

data/CHANGELOG.md CHANGED

@@ -1,3 +1,12 @@
+### 0.3.10 / 2016-12-04
+* [NEW] Method `ParseForest#ambiguous?`. Indicates whether the parse is ambiguous.
+* [CHANGE] File `README.md` updated with new grammar builder syntax & typo fixes.
+* [CHANGE] Method `GrammarBuilder#initialize`: Accepts a block argument that allows lighter construction.
+### 0.3.09 / 2016-11-27
+* [CHANGE] File `README.md` fully rewritten and added an example.
+* [CHANGE] Directory `examples` completely reorganized.
 ### 0.3.09 / 2016-11-27
 * [CHANGE] File `README.md` fully rewritten and added an example.
 * [CHANGE] Directory `examples` completely reorganized.

data/README.md CHANGED

@@ -9,7 +9,7 @@
 [Rley](https://github.com/famished-tiger/Rley)
 ======
-A Ruby library for constructing general parsers for _any_ context-free languages.
+A Ruby library for constructing general parsers for _any_ context-free language.
 What is Rley?
@@ -69,7 +69,7 @@ The tour is organized into the following steps:
 1. [Defining the language grammar](#defining-the-language-grammar)
 2. [Creating a lexicon](#creating-a-lexicon)
 3. [Creating a tokenizer](#creating-a-tokenizer)
-4. [Building the parser](building-the-parser)
+4. [Building the parser](#building-the-parser)
 5. [Parsing some input](#parsing-some-input)
 6. [Generating the parse forest](#generating-the-parse-forest)
@@ -84,21 +84,20 @@ The subset of English grammar is based on an example from the NLTK book.
     require 'rley'  # Load Rley library
     # Instantiate a builder object that will build the grammar for us
-    builder = Rley::Syntax::GrammarBuilder.new
-    # Next 2 lines we define the terminal symbols (=word categories in the lexicon)
-    builder.add_terminals('Noun', 'Proper-Noun', 'Verb')
-    builder.add_terminals('Determiner', 'Preposition')
-    # Here we define the productions (= grammar rules)
-    builder.add_production('S' => %w[NP VP])
-    builder.add_production('NP' => 'Proper-Noun')
-    builder.add_production('NP' => %w[Determiner Noun])
-    builder.add_production('NP' => %w[Determiner Noun PP])
-    builder.add_production('VP' => %w[Verb NP])
-    builder.add_production('VP' => %w[Verb NP PP])
-    builder.add_production('PP' => %w[Preposition NP])
+    builder = Rley::Syntax::GrammarBuilder.new do
+      # Terminal symbols (= word categories in lexicon)
+      add_terminals('Noun', 'Proper-Noun', 'Verb')
+      add_terminals('Determiner', 'Preposition')
+      # Here we define the productions (= grammar rules)
+      rule 'S' => %w[NP VP]
+      rule 'NP' => 'Proper-Noun'
+      rule 'NP' => %w[Determiner Noun]
+      rule 'NP' => %w[Determiner Noun PP]
+      rule 'VP' => %w[Verb NP]
+      rule 'VP' => %w[Verb NP PP]
+      rule 'PP' => %w[Preposition NP]
+    end
     # And now, let's build the grammar...
     grammar = builder.grammar
 ```
@@ -208,8 +207,6 @@ Here are a few other ones:
 ##  Thanks to:
 * Professor Keshav Pingali, one of the creators of the Grammar Flow Graph parsing approach for his encouraging e-mail exchanges.
----
 Copyright
 ---------
 Copyright (c) 2014-2016, Dimitri Geshef.

data/examples/NLP/mini_en_demo.rb CHANGED

@@ -9,20 +9,20 @@ require 'rley'  # Load Rley library
 # with a very simplified syntax.
 # Instantiate a builder object that will build the grammar for us
-builder = Rley::Syntax::GrammarBuilder.new
+builder = Rley::Syntax::GrammarBuilder.new do
+  # Next 2 lines we define the terminal symbols (=word categories in the lexicon)
+  add_terminals('Noun', 'Proper-Noun', 'Verb')
+  add_terminals('Determiner', 'Preposition')
-# Next 2 lines we define the terminal symbols (=word categories in the lexicon)
-builder.add_terminals('Noun', 'Proper-Noun', 'Verb')
-builder.add_terminals('Determiner', 'Preposition')
-# Here we define the productions (= grammar rules)
-builder.add_production('S' => %w[NP VP])
-builder.add_production('NP' => 'Proper-Noun')
-builder.add_production('NP' => %w[Determiner Noun])
-builder.add_production('NP' => %w[Determiner Noun PP])
-builder.add_production('VP' => %w[Verb NP])
-builder.add_production('VP' => %w[Verb NP PP])
-builder.add_production('PP' => %w[Preposition NP])
+  # Here we define the productions (= grammar rules)
+  rule 'S' => %w[NP VP]
+  rule 'NP' => 'Proper-Noun'
+  rule 'NP' => %w[Determiner Noun]
+  rule 'NP' => %w[Determiner Noun PP]
+  rule 'VP' => %w[Verb NP]
+  rule 'VP' => %w[Verb NP PP]
+  rule 'PP' => %w[Preposition NP]
+end
 # And now, let's build the grammar...
 grammar = builder.grammar
@@ -70,8 +70,6 @@ def tokenizer(aTextToParse, aGrammar)
   return tokens
 end
-More realistic NLP will will most probably
 ########################################
 # Step 4. Create a parser for that grammar
 # Easy with Rley...
@@ -80,6 +78,7 @@ parser = Rley::Parser::GFGEarleyParser.new(grammar)
 ########################################
 # Step 5. Parsing the input
 input_to_parse = 'John saw Mary with a telescope'
+# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
 # Convert input text into a sequence of token objects...
 tokens = tokenizer(input_to_parse, grammar)
 result = parser.parse(tokens)
@@ -90,3 +89,4 @@ puts "Parsing successful? #{result.success?}" # => Parsing successful? true
 # Step 6. Generating the parse forest
 pforest = result.parse_forest
+puts "Parsing ambiguous? #{pforest.ambiguous?}" # => Parsing ambiguous? false

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.3.09'.freeze
+  Version = '0.3.10'.freeze
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm".freeze

data/lib/rley/parser/parse_forest_builder.rb CHANGED

@@ -29,7 +29,7 @@ module Rley # This module is used as a namespace
       # A hash with pairs of the form: visited parse entry => forest node
       attr_reader(:entry2node)
-      # A hash with pairs of the form:
+      # A hash with pairs of the form:
       # parent end entry => path to alternative node
       # This is needed for synchronizing backtracking
       attr_reader(:entry2path_to_alt)
@@ -89,12 +89,12 @@ module Rley # This module is used as a namespace
           when :revisit
-            # Retrieve the already existing node corresponding
+            # Retrieve the already existing node corresponding
             # to re-visited entry
             popular = @entry2node[anEntry]
             # Share with parent (if needed)...
-            children = curr_parent.subnodes
+            children = curr_parent.subnodes
             curr_parent.add_subnode(popular) unless children.include? popular
           else
@@ -105,7 +105,7 @@ module Rley # This module is used as a namespace
       def process_item_entry(anEvent, anEntry, anIndex)
         case anEvent
-          when :visit
+          when :visit
             if anEntry.exit_entry?
               # Previous entry was an end entry (X. pattern)
               # Does the previous entry have multiple antecedent?
@@ -119,7 +119,7 @@ module Rley # This module is used as a namespace
                 create_alternative_node(anEntry)
               end
             end
             # Does this entry have multiple antecedent?
             if anEntry.antecedents.size > 1
               # Store current path for later backtracking
@@ -129,7 +129,7 @@ module Rley # This module is used as a namespace
               # curr_parent.refinement = :or
               create_alternative_node(anEntry)
-            end
+            end
             # Retrieve the grammar symbol before the dot (if any)
             prev_symbol = anEntry.prev_symbol
@@ -146,7 +146,7 @@ module Rley # This module is used as a namespace
                 end
                 curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
             end
           when :backtrack
             # Restore path
             @curr_path = entry2path_to_alt[anEntry].dup
@@ -154,9 +154,9 @@ module Rley # This module is used as a namespace
             antecedent_index = curr_parent.subnodes.size
             # puts "Current parent #{curr_parent.to_string(0)}"
             # puts "Antecedent index #{antecedent_index}"
-            create_alternative_node(anEntry)
+            create_alternative_node(anEntry)
         when :revisit
             # Retrieve the grammar symbol before the dot (if any)
             prev_symbol = anEntry.prev_symbol
@@ -172,7 +172,7 @@ module Rley # This module is used as a namespace
                   create_epsilon_node(anEntry, anIndex)
                 end
                 curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
-            end
+            end
         end
       end
@@ -187,8 +187,8 @@ module Rley # This module is used as a namespace
         non_terminal = nonTSymb.nil? ? anEntry.vertex.non_terminal : nonTSymb
         new_node = Rley::SPPF::NonTerminalNode.new(non_terminal, aRange)
         entry2node[anEntry] = new_node
-        # puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
         add_subnode(new_node)
+        # puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
         return new_node
       end
@@ -200,6 +200,7 @@ module Rley # This module is used as a namespace
         range = curr_parent.range
         alternative = Rley::SPPF::AlternativeNode.new(vertex, range)
         add_subnode(alternative)
+        forest.is_ambiguous = true
         # puts "FOREST ADD #{alternative.key}"
         return alternative
@@ -231,13 +232,13 @@ module Rley # This module is used as a namespace
       # Add the given node if not yet present in parse forest
       def add_node_to_forest(aNode)
         key_node = aNode.key
-        if forest.include?(key_node)
-          new_node = forest.key2node[key_node]
-        else
-          new_node = aNode
-          forest.key2node[key_node] = new_node
-          # puts "FOREST ADD #{key_node}"
-        end
+          if forest.include?(key_node)
+            new_node = forest.key2node[key_node]
+          else
+            new_node = aNode
+            forest.key2node[key_node] = new_node
+            # puts "FOREST ADD #{key_node}"
+          end
         add_subnode(new_node, false)
         return new_node

data/lib/rley/sppf/parse_forest.rb CHANGED

@@ -1,5 +1,6 @@
 require_relative 'token_node'
 require_relative 'non_terminal_node'
+require_relative 'alternative_node'
 module Rley # This module is used as a namespace
   module SPPF # This module is used as a namespace
@@ -18,17 +19,28 @@ module Rley # This module is used as a namespace
       # A Hash with pairs of the kind node key => node
       attr_reader(:key2node)
+      # A setter that tells that the parse is ambiguous.
+      attr_writer(:is_ambiguous)
       # @param theRootNode [ParseForestNode] The root node of the parse tree.
       def initialize(theRootNode)
         @root = theRootNode
         @key2node = {}
+        @is_ambiguous = false
       end
       # Returns true if the given node is present in the forest.
       def include?(aNode)
         return key2node.include?(aNode)
       end
+      # Returns true if the parse encountered a structural ambiguity
+      # (i.e. more than one parse tree for the given input)
+      def ambiguous?()
+        return @is_ambiguous
+      end
       # Part of the 'visitee' role in the Visitor design pattern.

data/lib/rley/syntax/grammar_builder.rb CHANGED

@@ -18,9 +18,12 @@ module Rley # This module is used as a namespace
       # The list of production rules for the grammar to build
       attr_reader(:productions)
-      def initialize()
+      def initialize(&aBlock)
         @symbols = {}
         @productions = []
+        instance_exec(&aBlock) if block_given?
       end
       # Retrieve a grammar symbol from its name.
@@ -87,6 +90,8 @@ module Rley # This module is used as a namespace
         return @grammar
       end
+      alias rule add_production
       private

data/spec/rley/parser/parse_forest_factory_spec.rb CHANGED

@@ -17,7 +17,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       let(:sample_grammar) do
           # Grammar based on paper from Elisabeth Scott
-          # "SPPF=Style Parsing From Earley Recognizers" in
+          # "SPPF-Style Parsing From Earley Recognizers" in
           # Notes in Theoretical Computer Science 203, (2008), pp. 53-67
           # contains a hidden left recursion and a cycle
           builder = Syntax::GrammarBuilder.new

data/spec/rley/syntax/grammar_builder_spec.rb CHANGED

@@ -6,10 +6,10 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
 module Rley # Open this namespace to avoid module qualifier prefixes
   module Syntax # Open this namespace to avoid module qualifier prefixes
     describe GrammarBuilder do
-      context 'Initialization:' do
-        it 'should be created without argument' do
+      context 'Initialization without argument:' do
+        it 'could be created without argument' do
           expect { GrammarBuilder.new }.not_to raise_error
-        end
+        end
         it 'should have no grammar symbols at start' do
             expect(subject.symbols).to be_empty
@@ -19,6 +19,24 @@ module Rley # Open this namespace to avoid module qualifier prefixes
             expect(subject.productions).to be_empty
         end
       end # context
+      context 'Initialization with argument:' do
+        it 'could be created with a block argument' do
+          expect do GrammarBuilder.new { nil }
+          end.not_to raise_error
+        end
+        it 'could have grammar symbols from block argument' do
+          instance = GrammarBuilder.new do
+            add_terminals('a', 'b', 'c')
+          end
+          expect(instance.symbols.size).to eq(3)
+        end
+        it 'should have no productions at start' do
+            expect(subject.productions).to be_empty
+        end
+      end # context
       context 'Adding symbols:' do
         it 'should build terminals from their names' do
@@ -79,7 +97,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           expect_rhs = [ subject['a'], subject['A'], subject['c'] ]
           expect(new_prod.rhs.members).to eq(expect_rhs)
-          subject.add_production('A' => ['b'])
+          # GrammarBuilder#rule is an alias of add_production
+          subject.rule('A' => ['b'])
           expect(subject.productions.size).to eq(3)
           new_prod = subject.productions.last
           expect(new_prod.lhs).to eq(subject['A'])
@@ -89,11 +108,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       context 'Building grammar:' do
         subject do
-          instance = GrammarBuilder.new
-          instance.add_terminals('a', 'b', 'c')
-          instance.add_production('S' => ['A'])
-          instance.add_production('A' => %w(a A c))
-          instance.add_production('A' => ['b'])
+          instance = GrammarBuilder.new do
+            add_terminals('a', 'b', 'c')
+            add_production('S' => ['A'])
+            add_production('A' => %w(a A c))
+            add_production('A' => ['b'])
+          end
           instance
         end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.3.09
+  version: 0.3.10
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-11-27 00:00:00.000000000 Z
+date: 2016-12-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake