RubyGems - rley - Versions diffs - 0.2.04 → 0.2.05 - Mend

rley 0.2.04 → 0.2.05

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +8 -8
data/CHANGELOG.md +9 -0
data/README.md +4 -5
data/examples/parsers/parsing_ambig.rb +2 -2
data/examples/parsers/parsing_err_expr.rb +15 -26
data/examples/parsers/{parsing_tricky.rb → tracing_parser.rb} +2 -1
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/earley_parser.rb +19 -21
data/lib/rley/parser/state_set.rb +11 -0
data/spec/rley/parser/earley_parser_spec.rb +8 -1
data/spec/rley/parser/parsing_spec.rb +1 -2
metadata +3 -3

checksums.yaml CHANGED

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    YmIzNmI1ZmM0N2QyOWM5NmQyYjlmOWRlNzllZmZjMmMxZmNmNmQ4Yg==
+    MzRiMzQ3MzkwZGMzMDJjZDVlYjVjNGI0YzdmMzE3NDFkOTRkOWM3ZQ==
   data.tar.gz: !binary |-
-    MmJjMDU3ZTMwYTA2NzY1YzJjOWQ3ZDk1MGZjYmFmMGMyMjgzOWZhYQ==
+    MDJjMDNjNmIxMWVmMDhkZmFjM2U1ZGQ4ZmFkM2ZjYmZjY2IzNzk0Yw==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    OTgxZTIxZWZhMWRlZTU1ZmVmZDhlYjllOTk2YjYwOTE5NDZjMDgzNzVlMmE3
-    YTIyYzNlNDU3MWE2OTZjM2I4MzAxNzhmMDFjNWU5YmI2N2QyNzQ2NTcxYjg1
-    ZjZkOTU2MWU4ZjM0NWUyMWM5ZDdiNDE1NzM2YTk0NDdlOThhMmE=
+    MmUwZGIwYWNkNzJhOWY4M2Y1YzE1MjljN2JmODg3ZWVlODJhYjI0NzRmMzky
+    NWQ3NzUyM2JhODU5M2I3MzYyY2IyMWMxZTA3ZDQxMTU0ODdmZmY5OTg5YmNi
+    MWU3OTViYzY3Y2E4NDgyMjhiMmUzNDk2NjY0MTdiYWUwYmFkYTE=
   data.tar.gz: !binary |-
-    NDRhZmM5Mzc2ZmFhOWI1OTBiNzMwODA0OTE5NTk4ZjUzYzQ0ZjgyOTAzYTg2
-    YTE0YjFjZDRjM2M0NDYwZDk4Nzg3NGM0OTM4NWRjMzk4NTY4Nzg0OTdkNzAx
-    NjgxOTAxMmIyZWFjMzY0Y2M3MTU4NzRhZjA5MzdlMjUzYzdhNmI=
+    ZjAwNmI4MjNmNDEyOWZlMDM4YjA2YzM3MTBhMjc0MTcyZjliYjc3NWU0ZWIy
+    YjM2OTQzYWVkMDlhMGRkNmQ2OWNhYzhkM2IyNjNlNGNlMzEzMDA3MDYzM2Zj
+    ZTNmOTFjZTcyODRiNjAyNjMwNjQ3MzQ4ZDUyOTMyMGI4NTkxYjc=

data/CHANGELOG.md CHANGED

@@ -1,3 +1,12 @@
+### 0.2.05 / 2015-03-19
+* [NEW] Class `EarleyParser` implements a crude error detection mechanism. A syntax error causes an exception to be raised.
+* [CHANGE] Examplar file `parsing_err_expr.rb`: demo error message.
+### 0.2.04 / 2015-03-04
+* [NEW] Class `ParseTracer` that helps to trace the parse steps (similar the trace format in NLTK).
+* [CHANGE] Method `EarleyParser#parse` takes a trace level argument.
 ### 0.2.03 / 2015-02-06
 * [FIX] File `.rubocop.yml`: removal of setting for obsolete EmptyLinesAroundBody cop.
 * [CHANGE] Source code re-formatted to please Rubocop 0.29.

data/README.md CHANGED

@@ -9,20 +9,19 @@ Rley
 [![Dependency Status](https://gemnasium.com/famished-tiger/Rley.svg)](https://gemnasium.com/famished-tiger/Rley)
 [![License](https://img.shields.io/badge/license-MIT-brightgreen.svg?style=flat)](https://github.com/famished-tiger/Rley/blob/master/LICENSE.txt)
-__Rley__ is a Ruby implementation of an [Earley parser](http://en.wikipedia.org/wiki/Earley_parser).
+__Rley__ is a Ruby implementation of a parser using the [Earley](http://en.wikipedia.org/wiki/Earley_parser) algorithm.
 The project aims to build a parser convenient for lightweight NLP (Natural Language Processing) purposes.
 Yet another parser?
 Yes and no. Rley doesn't aim to replace other very good programming language parsers for Ruby.
-The latter are faster because they use faster algorithms at the price of a loss of generality
+The latter are faster because they use optimized algorithms at the price of a loss of generality
 in the grammar/language they support.
-The Earley's algorithm being more general is able to parse input without imposing restriction on the context-free grammar.
-Consult Wikipedia to learn more about Earley's parsing algorithm.
+The Earley's algorithm being more general is able to parse input that conforms to any context-free grammar.
+For instance, it copes with ambiguous grammars.
 This project is in "earley" stage.
 ####Roadmap:
-- Add examples (including small NLP grammar)
 - Document the parser API
 - Add more validation tests and sample grammars
 - Add AST generation (and semantic actions?)

data/examples/parsers/parsing_ambig.rb CHANGED

@@ -69,7 +69,7 @@ pp result
 # Step 6. Generate a parse tree from the parse result
 ptree = result.parse_tree
 pp ptree
-=begin
+#=begin
 ########################################
 # Step 7. Render the parse tree (in JSON)
 # Let's create a parse tree visitor
@@ -81,5 +81,5 @@ renderer = Rley::Formatter::Json.new(STDOUT)
 # Now emit the parse tree as JSON on the console output
 puts "JSON rendering of the parse tree for '#{valid_input}' input:"
 renderer.render(visitor)
-=end
+#=end
 # End of file

data/examples/parsers/parsing_err_expr.rb CHANGED

@@ -1,5 +1,5 @@
-# Purpose: to demonstrate how to handle parsing errors
-# and render a parse tree
+# Purpose: to demonstrate how to catch parsing errors
 require 'pp' # TODO remove this dependency
 require 'rley'  # Load the gem
@@ -8,9 +8,8 @@ require 'rley'  # Load the gem
 # 2. Create a tokenizer for the language
 # 3. Create a parser for that grammar
 # 4. Tokenize the input
-# 5. Let the parser process the input
-# 6. Generate a parse tree from the parse result
-# 7. Render the parse tree (in JSON)
+# 5. Let the parser process the invalid input
 ########################################
 # Step 1. Define a grammar for a very simple arithmetic expression language
@@ -55,31 +54,21 @@ end
 parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
 ########################################
-# Step 3. Tokenize the invalid input
-invalid_input = '2 + 3 * * 4'
+# Step 4. Tokenize the invalid input
+invalid_input = '2 + 3 * * 4' # Notice the repeated stars (*)
+puts "Invalid expression to parse: #{invalid_input}"
+puts ''
 tokens = tokenizer(invalid_input, grammar_s_expr)
 ########################################
-# Step 5. Let the parser process the input
-result = parser.parse(tokens)
-puts "Parse successful? #{result.success?}"
-pp result
-########################################
-# Step 6. Generate a parse tree from the parse result
-ptree = result.parse_tree
-pp ptree
-########################################
-# Step 7. Render the parse tree (in JSON)
-# Let's create a parse tree visitor
-visitor = Rley::ParseTreeVisitor.new(ptree)
+# Step 5. Let catch the exception caused by a syntax error...
+# ... and display the error message
+begin
+  parser.parse(tokens)
+  rescue StandardError => exc
+    puts exc.message
+end
-#Here we create a renderer object...
-renderer = Rley::Formatter::Json.new(STDOUT)
-# Now emit the parse tree as JSON on the console output
-puts "JSON rendering of the parse tree for '#{invalid_input}' input:"
-renderer.render(visitor)
 # End of file

data/examples/parsers/{parsing_tricky.rb → tracing_parser.rb} RENAMED

@@ -46,7 +46,8 @@ valid_input = 'abcdefg'
 tokens = tokenizer(valid_input, grammar_tricky)
 ########################################
-# Step 5. Let the parser process the input, set trace level to 1
+# Step 5. Let the parser process the input
+# Force the parser to trace its parsing progress.
 result = parser.parse(tokens, 1)
 puts "Parsing success? #{result.success?}"

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.2.04'
+  Version = '0.2.05'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/parser/earley_parser.rb CHANGED

@@ -27,27 +27,6 @@ module Rley # This module is used as a namespace
         @start_mapping = build_start_mapping(dotted_items)
         @next_mapping = build_next_mapping(dotted_items)
       end
-=begin
-    You can optionally specify a tracing level, for how much output you
-    want to see:
-    0: No output.
-    1: Show edges from scanner and completer rules (not predictor).
-    2 (default): Show all edges as they are added to the chart.
-        - For each index I{end} in [0, 1, ..., N]:
-          - For each I{edge} s.t. I{edge}.end = I{end}:
-            - If I{edge} is incomplete, and I{edge}.next is not a part
-              of speech:
-                - Apply PredictorRule to I{edge}
-            - If I{edge} is incomplete, and I{edge}.next is a part of
-              speech:
-                - Apply ScannerRule to I{edge}
-            - If I{edge} is complete:
-                - Apply CompleterRule to I{edge}
-        - Return any complete parses in the chart
-=end
       # Parse a sequence of input tokens.
       # @param aTokenSequence [Array] Array of Tokens objects returned by a
@@ -65,6 +44,7 @@ module Rley # This module is used as a namespace
         result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
         last_token_index = aTokenSequence.size
         (0..last_token_index).each do |i|
+          handle_error(result) if result.chart[i].empty?
           predicted = Set.new
           result.chart[i].each do |state|
             if state.complete?  # End of production reached?
@@ -220,6 +200,24 @@ module Rley # This module is used as a namespace
           next_mapping[item]
         end
       end
+      # Raise an exception to indicate a syntax error.
+      def handle_error(aParsing)
+        # Retrieve the first empty state set
+        pos = aParsing.chart.state_sets.find_index(&:empty?)
+        lexeme_at_pos = aParsing.tokens[pos - 1].lexeme
+        terminals = aParsing.chart.state_sets[pos - 1].expected_terminals
+        err_msg = "Syntax error at or near token #{pos}"
+        err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
+        if terminals.size > 1
+          err_msg << "one of: #{terminals},"
+        else
+           err_msg << ": #{terminals[0]},"
+        end
+        err_msg << " found a #{aParsing.tokens[pos-1].terminal} instead."
+        fail StandardError, err_msg
+      end
     end # class
   end # module
 end # module

data/lib/rley/parser/state_set.rb CHANGED

@@ -59,6 +59,17 @@ module Rley # This module is used as a namespace
         return candidate
       end
+      # The list of distinct expected terminal symbols. An expected symbol is on the
+      # left of a dot in a parse state of the parse set.
+      def expected_terminals()
+        expecting_terminals = states.select do |s|
+          s.dotted_rule.next_symbol.kind_of?(Rley::Syntax::Terminal)
+        end
+        terminals = expecting_terminals.map { |s| s.dotted_rule.next_symbol }
+        return terminals.uniq
+      end
       private

data/spec/rley/parser/earley_parser_spec.rb CHANGED

@@ -557,7 +557,13 @@ SNIPPET
             Token.new('c', c_),
             Token.new('c', c_)
           ]
-          parse_result = subject.parse(wrong)
+          err_msg = <<-MSG
+Syntax error at or near token 3>>>c<<<:
+Expected one of: ['a', 'b'], found a 'c' instead.
+MSG
+          err = StandardError
+          expect { subject.parse(wrong)}.to raise_error(err, err_msg.chomp)
+=begin
           expect(parse_result.success?).to eq(false)
           ###################### S(0) == . a a c c
@@ -589,6 +595,7 @@ SNIPPET
           ###################### S(3) == a a c? c
           state_set_3 = parse_result.chart[3]
           expect(state_set_3.states).to be_empty  # This is an error symptom
+=end
         end
         it 'should parse a grammar with nullable nonterminals' do

data/spec/rley/parser/parsing_spec.rb CHANGED

@@ -133,7 +133,7 @@ SNIPPET
           expect(new_state.origin).to eq(0)
         end
       end # context
-=begin
       context 'Parse tree building:' do
         let(:sample_grammar1) do
           builder = grammar_abc_builder
@@ -401,7 +401,6 @@ SNIPPET
           expect(actual).to eq(expected_text.chomp)
         end
       end # context
-=end
     end # describe
   end # module
 end # module

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.2.04
+  version: 0.2.05
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-03-04 00:00:00.000000000 Z
+date: 2015-03-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -108,7 +108,7 @@ files:
 - examples/parsers/parsing_groucho.rb
 - examples/parsers/parsing_L0.rb
 - examples/parsers/parsing_L1.rb
-- examples/parsers/parsing_tricky.rb
+- examples/parsers/tracing_parser.rb
 - examples/recognizers/recognizer_abc.rb
 - lib/rley.rb
 - lib/rley/constants.rb