RubyGems - rley - Versions diffs - 0.0.05 → 0.0.06 - Mend

rley 0.0.05 → 0.0.06

Files changed (7) hide show

checksums.yaml +8 -8
data/CHANGELOG.md +7 -0
data/README.md +17 -2
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/earley_parser.rb +3 -3
data/spec/rley/parser/earley_parser_spec.rb +33 -5
metadata +1 -1

checksums.yaml CHANGED

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    ZTEwY2E4YWQ0MDgwMmE0MTFjMjBmOGQ0OWUwYjI5ZjZjMWU2MDNjMw==
+    NzBkOTQ2MmQ2MDcxZGFjZTM5YzNlZGY5NDg1MTFhOWVjYzFjNWFkYg==
   data.tar.gz: !binary |-
-    MjkzZGJjMjM3MmIyZDY0ZjRmZjU2ZTEzNzM2NDRmNzE5MGNhZTU3ZA==
+    ZjEwNzMyNWRjMmUxMGU1YjJmYzQxODg5NjA4M2YzNWU4NmVhNzcyYw==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    MzRmMTAxMzcyZmJkZjNiYTYxNGYxOTQ1M2YzNDRmOTcyZGEzODVjYWZhOGE2
-    MTQxYjQ2YWMxZGMzN2E3NjRjZjhjNmI2MzljOGIwMzcxNGY1ZDg2MGM0YTA5
-    YzZmZjBiODMwMzk0YmY4OTQ4NjRhNGNjMWJjZTRlNjU5Y2NiNjY=
+    MWNhOTQyOGQ3NzcwMzcxY2FlYTgzMWJhMGNlYmQzYzM2ZTAwNjdlZjJkOGE1
+    ODZiODkxMGFlNzc5NjUxNzA3OTVjNzFkNzY1NGQ2MTc4MDRjMTg0YWRhODc5
+    ZTU3NDhjZmY3ZDE2YjM3NmRjZjA0MTVmNjNmZGQxYmE4YTg5YmM=
   data.tar.gz: !binary |-
-    ZDAzM2I1ZDNjMWRjNWEyMGNhMWYwNmUzNTdiNzE3NDYzMzVlMzdiYjFiYzUx
-    Y2I0MGY2MjU2MDg5ZDIxOGI5NjViOWExMDhkZGIyZTc0NmQ4ODVlOTJlYWQx
-    ODU0NGRhZDZmNDA1MTdmMzE1Y2Q3OTY3MmMxZDM4Y2NlYmI3YWQ=
+    MzQ2MmRkMjIyMWQxZGI1MTBmOTdhMDY1ZTYxYjc0MGExZGJjZjFkMjZlOTM3
+    MTk0NjdhMjUwNjAwYWQzMzBlOTdhNzg5ZmFlNWFjNjlhOWM3ZjMxMWJlOTFj
+    NjVlODk3NjBkYTQ3OTFjODBlMjQ2ZjZjNTA5ZGIxZjdiYWIxYzA=

data/CHANGELOG.md CHANGED

@@ -1,3 +1,10 @@
+### 0.0.06 / 2014-11-13
+* [CHANGE] File `README.md`: Added roadmap section.
+* [FIX] `EarleyParser#parse`: prevent call to `scanning` method after last token encountered.
+### 0.0.05 / 2014-11-13
+* [CHANGE] Code re-styling to please Rubocop 0.27.0 (less than 10 offenses).
 ### 0.0.04 / 2014-11-12
 * [CHANGE] Class `DottedItem` moved to `Rley` module.

data/README.md CHANGED

@@ -7,12 +7,27 @@ Rley
 [![Gem Version](https://badge.fury.io/rb/rley.svg)](http://badge.fury.io/rb/rley)
 ### What is Rley? ###
-__Rley__ is a Ruby implementation of a Earley parser.
+__Rley__ is a Ruby implementation of a Earley parser.
 The objective is to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
-This project is in "early" stage.
+Yet another parser?
+Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
+The latter are faster because they use faster algorithms at the price of a loss of generality
+in the grammar/language they support.
+The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
 Consult Wikipedia to learn more about Earley's parsing algorithm.
+This project is in "early" stage.
+####Roadmap:
+- Add more validation tests and sample grammars
+- Add AST generation (and semantic actions?)
+- Add DSL for grammar specification
+- Add grammar validations
+- Add error reporting
+- Add examples (including small NLP grammar)
+- Add a command-line interface
+- Provide documentation and examples
 Copyright
 ---------

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.0.05'
+  Version = '0.0.06'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/parser/earley_parser.rb CHANGED

@@ -30,8 +30,8 @@ module Rley # This module is used as a namespace
       def parse(aTokenSequence)
         result = Parsing.new(start_dotted_item, aTokenSequence)
-        (0..aTokenSequence.size).each do |i|
+        last_token_index = aTokenSequence.size
+        (0..last_token_index).each do |i|
           result.chart[i].each do |state|
             if state.complete?
               # parse reached end of production
@@ -40,7 +40,7 @@ module Rley # This module is used as a namespace
               next_symbol = state.next_symbol
               if next_symbol.kind_of?(Syntax::NonTerminal)
                 prediction(result, next_symbol, i)
-              else
+              elsif i < last_token_index
                 # Expecting a terminal symbol
                 scanning(result, next_symbol, i)
               end

data/spec/rley/parser/earley_parser_spec.rb CHANGED

@@ -33,7 +33,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       end
 =end
-      # Grammar 1: A very simple language
+      # Grammar 1: A very simple language
+      # (based on example in N. Wirth "Compiler Construction" book, p. 6)
       # S ::= A.
       # A ::= "a" A "c".
       # A ::= "b".
@@ -64,14 +65,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       # Grammar 2: A simple arithmetic expression language
-      # E ::= S.
+      # (based on example in article on Earley's algorithm in Wikipedia)
+      # P ::= S.
       # S ::= S "+" M.
       # S ::= M.
       # M ::= M "*" M.
       # M ::= T.
       # T ::= an integer number token.
       # Let's create the grammar piece by piece
-      let(:nt_E) { Syntax::NonTerminal.new('E') }
+      let(:nt_P) { Syntax::NonTerminal.new('P') }
       let(:nt_M) { Syntax::NonTerminal.new('M') }
       let(:nt_T) { Syntax::NonTerminal.new('T') }
       let(:plus) { Syntax::VerbatimSymbol.new('+') }
@@ -80,14 +82,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
         integer_pattern = /[-+]?[0-9]+/	# Decimal notation
         Syntax::Literal.new('integer', integer_pattern)
       end
-      let(:prod_E) { Syntax::Production.new(nt_E, [nt_S]) }
+      let(:prod_P) { Syntax::Production.new(nt_P, [nt_S]) }
       let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
       let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
       let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
       let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
       let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
       let(:grammar_expr) do
-        all_prods = [prod_E, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
+        all_prods = [prod_P, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
         Syntax::Grammar.new(all_prods)
       end
@@ -242,6 +244,32 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           ]
           compare_state_set(state_set_5, expectations)
         end
+        it 'should parse a valid simple expression' do
+          instance = EarleyParser.new(grammar_expr)
+          parse_result = instance.parse(grm2_tokens)
+          expect(parse_result.success?).to eq(true)
+          ######################
+          # Expectation chart[0]:
+          # (1) P -> . S, 0         # start rule
+          # (2) S -> . S "+" M, 0   # predict from (1)
+          # (3) S -> . M, 0         # predict from (1)
+          # (4) M -> . M "*" T, 0   # predict from (3)
+          # (5) M -> . T, 0         # predict from (3)
+          # (6) T -> . integer, 0   # predict from (3)
+          expectations = [
+            { origin: 0, production: prod_P, dot: 0 },
+            { origin: 0, production: prod_S1, dot: 0 },
+            { origin: 0, production: prod_S2, dot: 0 },
+            { origin: 0, production: prod_M1, dot: 0 },
+            { origin: 0, production: prod_M2, dot: 0 },
+            { origin: 0, production: prod_T, dot: 0 }
+          ]
+          compare_state_set(parse_result.chart[0], expectations)
+        end
         it 'should parse an invalid simple input' do
           # Parse an erroneous input (b is missing)

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.0.05
+  version: 0.0.06
 platform: ruby
 authors:
 - Dimitri Geshef