RubyGems - rley - Versions diffs - 0.2.03 → 0.2.04 - Mend

rley 0.2.03 → 0.2.04

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

checksums.yaml +8 -8
data/examples/parsers/parsing_err_expr.rb +85 -0
data/examples/parsers/parsing_groucho.rb +99 -0
data/examples/parsers/parsing_tricky.rb +53 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/parser/chart.rb +27 -4
data/lib/rley/parser/earley_parser.rb +54 -16
data/lib/rley/parser/parse_state_tracker.rb +1 -0
data/lib/rley/parser/parse_tracer.rb +100 -0
data/lib/rley/parser/parsing.rb +18 -6
data/lib/rley/parser/state_set.rb +9 -1
data/spec/rley/parser/chart_spec.rb +71 -4
data/spec/rley/parser/earley_parser_spec.rb +33 -0
data/spec/rley/parser/parse_tracer_spec.rb +193 -0
data/spec/rley/parser/parsing_spec.rb +28 -11
metadata +9 -3

checksums.yaml CHANGED

@@ -1,15 +1,15 @@
 ---
 !binary "U0hBMQ==":
   metadata.gz: !binary |-
-    ZWM0MGE3NzdlMzhiNTBkYTkzZjBlZjZjYzhhNTA1NzMwMzc4ZjI5NA==
+    YmIzNmI1ZmM0N2QyOWM5NmQyYjlmOWRlNzllZmZjMmMxZmNmNmQ4Yg==
   data.tar.gz: !binary |-
-    M2JjYTE2OTQxNmI3MzNjOGZjMzJlZGNhY2NiYjU2ZjgzZmVmNmE3Mw==
+    MmJjMDU3ZTMwYTA2NzY1YzJjOWQ3ZDk1MGZjYmFmMGMyMjgzOWZhYQ==
 !binary "U0hBNTEy":
   metadata.gz: !binary |-
-    MzA0NjBlNDZmNzMzYjc2MTg5Y2Q4NTUwMWE1MDZjMTM2MmJlM2I3ZmQ3ZTc2
-    ZWEzMjVjMmY5NjRkMzEyZGRmMTU1NTU5YzUyNjkxMGMwZDM4OTUyMGI2ZjQ0
-    NmQzMTIzOWFkNzAzOTY2MTA4MzYyYzU0MWU1MzkwMDQ5YjlkMjM=
+    OTgxZTIxZWZhMWRlZTU1ZmVmZDhlYjllOTk2YjYwOTE5NDZjMDgzNzVlMmE3
+    YTIyYzNlNDU3MWE2OTZjM2I4MzAxNzhmMDFjNWU5YmI2N2QyNzQ2NTcxYjg1
+    ZjZkOTU2MWU4ZjM0NWUyMWM5ZDdiNDE1NzM2YTk0NDdlOThhMmE=
   data.tar.gz: !binary |-
-    NjE0M2Q2YzliYzBkMzhlNzNhM2NhOTRiMDVkYzI1NTFlMTdiOGY0ODNjOWMz
-    YzkxZmYzNDg5ZTVkMzlkNjgzMzc5ZTE5ZTQzYzM0MTgwNDk4ZDQ3NDI4NGJh
-    NTg1NjMyYmVhNGU0MTkyMzk3NGJlMTYwM2QxMmZkNGIzMjMxMjE=
+    NDRhZmM5Mzc2ZmFhOWI1OTBiNzMwODA0OTE5NTk4ZjUzYzQ0ZjgyOTAzYTg2
+    YTE0YjFjZDRjM2M0NDYwZDk4Nzg3NGM0OTM4NWRjMzk4NTY4Nzg0OTdkNzAx
+    NjgxOTAxMmIyZWFjMzY0Y2M3MTU4NzRhZjA5MzdlMjUzYzdhNmI=

data/examples/parsers/parsing_err_expr.rb ADDED

@@ -0,0 +1,85 @@
+# Purpose: to demonstrate how to handle parsing errors
+# and render a parse tree
+require 'pp' # TODO remove this dependency
+require 'rley'  # Load the gem
+# Steps to render a parse tree (of a valid parsed input):
+# 1. Define a grammar
+# 2. Create a tokenizer for the language
+# 3. Create a parser for that grammar
+# 4. Tokenize the input
+# 5. Let the parser process the input
+# 6. Generate a parse tree from the parse result
+# 7. Render the parse tree (in JSON)
+########################################
+# Step 1. Define a grammar for a very simple arithmetic expression language
+# (based on example in article on Earley's algorithm in Wikipedia)
+# Let's create the grammar piece by piece
+builder = Rley::Syntax::GrammarBuilder.new
+builder.add_terminals('+', '*', 'integer')
+builder.add_production('P' => 'S')
+builder.add_production('S' => %w(S + M))
+builder.add_production('S' => 'M')
+builder.add_production('M' => %w(M * T))
+builder.add_production('M' => 'T')
+builder.add_production('T' => 'integer')
+# And now build the grammar...
+grammar_s_expr = builder.grammar
+########################################
+# 2. Create a tokenizer for the language
+# The tokenizer transforms the input into an array of tokens
+def tokenizer(aText, aGrammar)
+  tokens = aText.scan(/\S+/).map do |lexeme|
+    case lexeme
+      when '+', '*'
+        terminal = aGrammar.name2symbol[lexeme]
+      when /^[-+]?\d+$/
+        terminal = aGrammar.name2symbol['integer']
+      else
+        msg = "Unknown input text '#{lexeme}'"
+        fail StandardError, msg
+    end
+    Rley::Parser::Token.new(lexeme, terminal)
+  end
+  return tokens
+end
+########################################
+# Step 3. Create a parser for that grammar
+parser = Rley::Parser::EarleyParser.new(grammar_s_expr)
+########################################
+# Step 3. Tokenize the invalid input
+invalid_input = '2 + 3 * * 4'
+tokens = tokenizer(invalid_input, grammar_s_expr)
+########################################
+# Step 5. Let the parser process the input
+result = parser.parse(tokens)
+puts "Parse successful? #{result.success?}"
+pp result
+########################################
+# Step 6. Generate a parse tree from the parse result
+ptree = result.parse_tree
+pp ptree
+########################################
+# Step 7. Render the parse tree (in JSON)
+# Let's create a parse tree visitor
+visitor = Rley::ParseTreeVisitor.new(ptree)
+#Here we create a renderer object...
+renderer = Rley::Formatter::Json.new(STDOUT)
+# Now emit the parse tree as JSON on the console output
+puts "JSON rendering of the parse tree for '#{invalid_input}' input:"
+renderer.render(visitor)
+# End of file

data/examples/parsers/parsing_groucho.rb ADDED

@@ -0,0 +1,99 @@
+# Purpose: to demonstrate how to parse an emblematic ambiguous sentence
+# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
+require 'pp'
+require 'rley'  # Load the gem
+# Steps to render a parse tree (of a valid parsed input):
+# 1. Define a grammar
+# 2. Create a tokenizer for the language
+# 3. Create a parser for that grammar
+# 4. Tokenize the input
+# 5. Let the parser process the input
+# 6. Generate a parse tree from the parse result
+# 7. Render the parse tree (in JSON)
+########################################
+# Step 1. Define a grammar for a micro English-like language
+# based on Jurafky & Martin L0 language (chapter 12 of the book).
+# It defines the syntax of a sentence in a language with a
+# very limited syntax and lexicon in the context of airline reservation.
+builder = Rley::Syntax::GrammarBuilder.new
+builder.add_terminals('N', 'V', 'Pro')  # N(oun), V(erb), Pro(noun)
+builder.add_terminals('Det', 'P')       # Det(erminer), P(reposition)
+builder.add_production('S' => %w[NP VP])
+builder.add_production('NP' => %w[Det N])
+builder.add_production('NP' => %w[Det N PP])
+builder.add_production('NP' => 'Pro')
+builder.add_production('VP' => %w[V NP])
+builder.add_production('VP' => %w[VP PP])
+builder.add_production('PP' => %w[P NP])
+# And now build the grammar...
+groucho_grammar = builder.grammar
+########################################
+# 2. Create a tokenizer for the language
+# The tokenizer transforms the input into an array of tokens
+# This is a very simplistic implementation for demo purposes.
+# The lexicon is just a Hash with pairs of the form:
+# word => terminal symbol name
+Groucho_lexicon = {
+  'elephant' => 'N',
+  'pajamas' => 'N',
+  'shot' => 'V',
+  'I' => 'Pro',
+  'an' => 'Det',
+  'my' => 'Det',
+  'in' => 'P',
+}
+# Highly simplified tokenizer implementation.
+def tokenizer(aText, aGrammar)
+  tokens = aText.scan(/\S+/).map do |word|
+    term_name = Groucho_lexicon[word]
+    if term_name.nil?
+      fail StandardError, "Word '#{word}' not found in lexicon"
+    end
+    terminal = aGrammar.name2symbol[term_name]
+    Rley::Parser::Token.new(word, terminal)
+  end
+  return tokens
+end
+########################################
+# Step 3. Create a parser for that grammar
+parser = Rley::Parser::EarleyParser.new(groucho_grammar)
+########################################
+# Step 3. Tokenize the input
+valid_input = 'I shot an elephant in my pajamas'
+tokens = tokenizer(valid_input, groucho_grammar)
+########################################
+# Step 5. Let the parser process the input
+result = parser.parse(tokens)
+puts "Parsing success? #{result.success?}"
+#=begin
+########################################
+# Step 6. Generate a parse tree from the parse result
+ptree = result.parse_tree
+########################################
+# Step 7. Render the parse tree (in JSON)
+# Let's create a parse tree visitor
+visitor = Rley::ParseTreeVisitor.new(ptree)
+#Here we create a renderer object...
+renderer = Rley::Formatter::Json.new(STDOUT)
+# Now emit the parse tree as JSON on the console output
+puts "JSON rendering of the parse tree for '#{valid_input}' input:"
+renderer.render(visitor)
+#=end
+# End of file

data/examples/parsers/parsing_tricky.rb ADDED

@@ -0,0 +1,53 @@
+# Purpose: to use a grammar that causes some Earley parsers to fail.
+# See: http://stackoverflow.com/questions/22311323/earley-parser-recursion
+require 'rley'  # Load the gem
+# Steps to parse some valid input:
+# 1. Define a grammar
+# 2. Create a tokenizer for the language
+# 3. Create a parser for that grammar
+# 4. Tokenize the input
+# 5. Let the parser process the input & trace its progress
+########################################
+# Step 1. Define a grammar that might cause infinite recursion
+# Let's create the grammar step-by-step with the grammar builder:
+builder = Rley::Syntax::GrammarBuilder.new
+builder.add_terminals('ident')
+builder.add_production('S' => 'E')
+builder.add_production('E' => ['E', 'E'] )
+builder.add_production('E' => 'ident')
+# And now build the grammar...
+grammar_tricky = builder.grammar
+########################################
+# 2. Create a tokenizer for the language
+# The tokenizer transforms the input into an array of tokens
+def tokenizer(aText, aGrammar)
+  terminal = aGrammar.name2symbol['ident']
+  tokens = aText.chars.map do |ch|
+    Rley::Parser::Token.new(ch, terminal)
+  end
+  return tokens
+end
+########################################
+# Step 3. Create a parser for that grammar
+parser = Rley::Parser::EarleyParser.new(grammar_tricky)
+########################################
+# Step 3. Tokenize the input
+valid_input = 'abcdefg'
+tokens = tokenizer(valid_input, grammar_tricky)
+########################################
+# Step 5. Let the parser process the input, set trace level to 1
+result = parser.parse(tokens, 1)
+puts "Parsing success? #{result.success?}"
+# End of file

data/lib/rley/constants.rb CHANGED

@@ -3,7 +3,7 @@
 module Rley # Module used as a namespace
   # The version number of the gem.
-  Version = '0.2.03'
+  Version = '0.2.04'
   # Brief description of the gem.
   Description = "Ruby implementation of the Earley's parsing algorithm"

data/lib/rley/parser/chart.rb CHANGED

@@ -7,10 +7,19 @@ module Rley # This module is used as a namespace
     # A one-dimensional array with n + 1 entries (n = number of input tokens).
     class Chart
       attr_reader(:state_sets)
+      # The level of trace details reported on stdout during the parse.
+      # The possible values are:
+      # 0: No trace output (default case)
+      # 1: Show trace of scanning and completion rules
+      # 2: Same as of 1 with the addition of the prediction rules
+      attr_reader(:tracer)
-      def initialize(startDottedItem, tokenCount)
+      # @param aTracerLevel [ParseTracer] A tracer object.
+      def initialize(startDottedItem, tokenCount, aTracer)
+        @tracer = aTracer
         @state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
-        push_state(startDottedItem, 0, 0)
+        push_state(startDottedItem, 0, 0, :start_rule)
       end
       # The dotted item/rule used to seed the parse chart.
@@ -38,9 +47,23 @@ module Rley # This module is used as a namespace
       end
       # Push a parse state for the chart entry with given index
-      def push_state(aDottedItem, anOrigin, anIndex)
+      def push_state(aDottedItem, anOrigin, anIndex, aReason)
         new_state = ParseState.new(aDottedItem, anOrigin)
-        self[anIndex].push_state(new_state)
+        pushed = self[anIndex].push_state(new_state)
+        if pushed && tracer.level > 0
+          case aReason
+            when :start_rule, :prediction
+              tracer.trace_prediction(anIndex, new_state)
+            when :scanning
+               tracer.trace_scanning(anIndex, new_state)
+            when :completion
+               tracer.trace_completion(anIndex, new_state)
+            else
+              raise NotImplementedError, "Unknown push_state mode #{aReason}"
+          end
+        end
       end
     end # class
   end # module

data/lib/rley/parser/earley_parser.rb CHANGED

@@ -1,4 +1,5 @@
 require_relative '../syntax/grammar'
+require_relative 'parse_tracer'
 require_relative 'dotted_item'
 require_relative 'parsing'
@@ -19,9 +20,7 @@ module Rley # This module is used as a namespace
       # In other words, the 'next_mapping' allows to find the dotted item
       # after "advancing" the dot
       attr_reader(:next_mapping)
-      # @param aGrammar [Grammar] The grammar of the language
-      # (to use by the parser).
       def initialize(aGrammar)
         @grammar = aGrammar
         @dotted_items = build_dotted_items(grammar)
@@ -29,29 +28,57 @@ module Rley # This module is used as a namespace
         @next_mapping = build_next_mapping(dotted_items)
       end
+=begin
+    You can optionally specify a tracing level, for how much output you
+    want to see:
+    0: No output.
+    1: Show edges from scanner and completer rules (not predictor).
+    2 (default): Show all edges as they are added to the chart.
+        - For each index I{end} in [0, 1, ..., N]:
+          - For each I{edge} s.t. I{edge}.end = I{end}:
+            - If I{edge} is incomplete, and I{edge}.next is not a part
+              of speech:
+                - Apply PredictorRule to I{edge}
+            - If I{edge} is incomplete, and I{edge}.next is a part of
+              speech:
+                - Apply ScannerRule to I{edge}
+            - If I{edge} is complete:
+                - Apply CompleterRule to I{edge}
+        - Return any complete parses in the chart
+=end
       # Parse a sequence of input tokens.
       # @param aTokenSequence [Array] Array of Tokens objects returned by a
       # tokenizer/scanner/lexer.
+      # @param aGrammar [Grammar] The grammar of the language
+      # (to use by the parser).
+      # @param aTraceLevel [Fixnum] The specified trace level.
+      # The possible values are:
+      # 0: No trace output (default case)
+      # 1: Show trace of scanning and completion rules
+      # 2: Same as of 1 with the addition of the prediction rules
       # @return [Parsing] an object that embeds the parse results.
-      def parse(aTokenSequence)
-        result = Parsing.new(start_dotted_item, aTokenSequence)
+      def parse(aTokenSequence, aTraceLevel = 0)
+        tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
+        result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
         last_token_index = aTokenSequence.size
         (0..last_token_index).each do |i|
           predicted = Set.new
           result.chart[i].each do |state|
-            if state.complete?
-              # parse reached end of production
-              completion(result, state, i)
+            if state.complete?  # End of production reached?
+              completion(result, state, i, tracer)
             else
               next_symbol = state.next_symbol
               if next_symbol.kind_of?(Syntax::NonTerminal)
                 unless predicted.include? next_symbol
-                  prediction(result, state, next_symbol, i)
+                  prediction(result, state, next_symbol, i, tracer)
                   predicted << next_symbol  # Avoid repeated predictions
                 end
               elsif i < last_token_index
                 # Expecting a terminal symbol
-                scanning(result, next_symbol, i)
+                scanning(result, next_symbol, i, tracer)
               end
             end
           end
@@ -134,18 +161,21 @@ module Rley # This module is used as a namespace
       #   immediately follows a dot
       #   (= is expected/predicted by the production rule)
       # @param aPosition [Fixnum] position in the input token sequence.
-      def prediction(aParsing, aState, aNonTerminal, aPosition)
+      def prediction(aParsing, aState, aNonTerminal, aPosition, aTracer)
+        if aTracer.level > 1
+          puts "Chart[#{aPosition}] Prediction(s) from #{aState}:"
+        end
         # Retrieve all start dotted items for productions
         # with aNonTerminal as its lhs
         items = start_mapping[aNonTerminal]
         items.each do |an_item|
-          aParsing.push_state(an_item, aPosition, aPosition)
+          aParsing.push_state(an_item, aPosition, aPosition, :prediction)
         end
         return unless aNonTerminal.nullable?
         # Ayock-Horspool trick for nullable rules
         next_item = next_mapping[aState.dotted_rule]
-        aParsing.push_state(next_item, aState.origin, aPosition)
+        aParsing.push_state(next_item, aState.origin, aPosition, :prediction)
       end
       # This method is called when a parse state for chart entry at position
@@ -162,7 +192,12 @@ module Rley # This module is used as a namespace
       # @param aTerminal [Terminal] a terminal symbol that
       #   immediately follows a dot
       # @param aPosition [Fixnum] position in the input token sequence.
-      def scanning(aParsing, aTerminal, aPosition)
+      def scanning(aParsing, aTerminal, aPosition, aTracer)
+        if aTracer.level > 1
+          prefix = "Chart[#{aPosition}] Scanning of terminal "
+          suffix = "#{aTerminal.name}:"
+          puts prefix + suffix
+        end
         aParsing.scanning(aTerminal, aPosition) do |item|
           next_mapping[item]
         end
@@ -173,11 +208,14 @@ module Rley # This module is used as a namespace
       # For every state in chart[aPosition] that is
       # complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
       # Find states s in chart[j] of the
-      #  form {dotted_rule: Y -> α • X β, origin: i}
+      #  form { dotted_rule: Y -> α • X β, origin: i}
       #   In other words, rules that predicted the non-terminal X.
       # For each s, add to chart[aPosition] a state of the form
       #   { dotted_rule: Y → α X • β, origin: i})
-      def completion(aParsing, aState, aPosition)
+      def completion(aParsing, aState, aPosition, aTracer)
+        if aTracer.level > 1
+          puts "Chart[#{aPosition}] Completion of state #{aState}:"
+        end
         aParsing.completion(aState, aPosition) do |item|
           next_mapping[item]
         end

data/lib/rley/parser/parse_state_tracker.rb CHANGED

@@ -25,6 +25,7 @@ module Rley # This module is used as a namespace
       # Write accessor. Set the given parse state as the current one.
       def parse_state=(aParseState)
+        fail StandardError, "Nil parse state" if aParseState.nil?
         @parse_state = aParseState
         processed_states[parse_state] = true
       end

data/lib/rley/parser/parse_tracer.rb ADDED

@@ -0,0 +1,100 @@
+require 'ostruct'
+module Rley # This module is used as a namespace
+  module Parser # This module is used as a namespace
+    # Utility class used to trace the parsing of a token sequence.
+    class ParseTracer
+      # The stream where the trace output is sent
+      attr_reader(:ostream)
+      # The trace level
+      attr_reader(:level)
+      attr_reader(:lexemes)
+      attr_reader(:col_width)
+      def initialize(aTraceLevel, anIO, aTokenSequence)
+        @level = aTraceLevel <= 0 ? 0 : [aTraceLevel, 2].min
+        @ostream = anIO
+        @lexemes = aTokenSequence.map(&:lexeme)
+        emit_tokens
+        emit_heading
+      end
+      # Emit the trace text to the output IO
+      # if the given trace level is equal or greater to the
+      # trace level of the tracer instance.
+      def print_if(aLevel, text)
+        ostream.print(text) if level >= aLevel
+      end
+      # Emit the trace of a scanning step.
+      def trace_scanning(aStatesetIndex, aParseState)
+        return unless level
+        scan_picture = '[' + '-' * (col_width-1) + ']'
+        org = OpenStruct.new(origin: aStatesetIndex - 1,
+          dotted_rule: aParseState.dotted_rule)
+        trace_diagram(aStatesetIndex, org, scan_picture)
+      end
+      def trace_prediction(aStatesetIndex, aParseState)
+        return unless level
+        trace_diagram(aStatesetIndex, aParseState, '>')
+      end
+      def trace_completion(aStatesetIndex, aParseState)
+        return unless level
+        if aStatesetIndex == lexemes.size && aParseState.origin == 0 &&  aParseState.complete?
+          picture = '=' * (col_width * lexemes.size - 1)
+        else
+          count = col_width * (aStatesetIndex - aParseState.origin) - 1
+          picture = '-' * count
+        end
+        completion_picture = '[' + picture + (aParseState.complete? ? ']' : '>')
+        trace_diagram(aStatesetIndex, aParseState, completion_picture)
+      end
+      private
+      def emit_tokens()
+        literals = lexemes.map { |lx| "'#{lx}'" }
+        print_if 1, '[' + literals.join(', ') + "]\n"
+      end
+      def emit_heading()
+        longest = lexemes.map(&:length).max
+        @col_width = longest + 3
+        headers = lexemes.map { |l| "#{l.center(col_width-1, ' ')}" }
+        print_if 1, '|.' + headers.join('.') + ".|\n"
+      end
+      def padding(aStatesetIndex, aParseState, aPicture)
+        l_pad_pattern = '.' + ' ' * (col_width-1)
+        left_padding =  l_pad_pattern * ([0, aParseState.origin].max)
+        r_pad_pattern = ' ' * (col_width-1) + '.'
+        right_padding = r_pad_pattern * (lexemes.size - aStatesetIndex)
+        return left_padding + aPicture + right_padding
+      end
+      def parse_state_str(aStatesetIndex, aParseState)
+        "[#{aParseState.origin}:#{aStatesetIndex}] #{aParseState.dotted_rule}"
+      end
+      def trace_diagram(aStatesetIndex, aParseState, aPicture)
+        diagram = padding(aStatesetIndex, aParseState, aPicture)
+        prefix = '|'
+        suffix = '| ' + parse_state_str(aStatesetIndex, aParseState)
+        trace = prefix + diagram + suffix
+        print_if 1, trace + "\n"
+      end
+    end # class
+  end # module
+end # module
+# End of file

data/lib/rley/parser/parsing.rb CHANGED

@@ -11,9 +11,14 @@ module Rley # This module is used as a namespace
       # The sequence of input token to parse
       attr_reader(:tokens)
-      def initialize(startDottedRule, theTokens)
+      # @param aTraceLevel [Fixnum] The specified trace level.
+      # The possible values are:
+      # 0: No trace output (default case)
+      # 1: Show trace of scanning and completion rules
+      # 2: Same as of 1 with the addition of the prediction rules
+      def initialize(startDottedRule, theTokens, aTracer)
         @tokens = theTokens.dup
-        @chart = Chart.new(startDottedRule, tokens.size)
+        @chart = Chart.new(startDottedRule, tokens.size, aTracer)
       end
       # Return true if the parse was successful (= input tokens
@@ -36,6 +41,13 @@ module Rley # This module is used as a namespace
         builder = tree_builder(state_tracker.state_set_index)
         loop do
+          match_symbol = state_tracker.symbol_on_left
+          # puts '--------------------'
+          # puts "Active parse state: #{state_tracker.parse_state}"
+          # puts "Matching symbol: #{match_symbol}"
+          # puts 'Parse tree:'
+          # puts builder.root.to_string(0)
           # Place the symbol on left of the dot in the parse tree
           done = insert_matched_symbol(state_tracker, builder)
           break if done
@@ -47,9 +59,9 @@ module Rley # This module is used as a namespace
       # Push a parse state (dotted item + origin) to the
       # chart entry with given index if it isn't yet in the chart entry.
-      def push_state(aDottedItem, anOrigin, aChartIndex)
+      def push_state(aDottedItem, anOrigin, aChartIndex, aReason)
         fail StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
-        chart.push_state(aDottedItem, anOrigin, aChartIndex)
+        chart.push_state(aDottedItem, anOrigin, aChartIndex, aReason)
       end
@@ -74,7 +86,7 @@ module Rley # This module is used as a namespace
         states = states_expecting(aTerminal, aPosition, false)
         states.each do |s|
           next_item = nextMapping.call(s.dotted_rule)
-          push_state(next_item, s.origin, aPosition + 1)
+          push_state(next_item, s.origin, aPosition + 1, :scanning)
         end
       end
@@ -95,7 +107,7 @@ module Rley # This module is used as a namespace
         states = states_expecting(curr_lhs, curr_origin, false)
         states.each do |s|
           next_item = nextMapping.call(s.dotted_rule)
-          push_state(next_item, s.origin, aPosition)
+          push_state(next_item, s.origin, aPosition, :completion)
         end
       end

data/lib/rley/parser/state_set.rb CHANGED

@@ -17,8 +17,16 @@ module Rley # This module is used as a namespace
       # Append the given state (if it isn't yet in the set)
       # to the list of states
       # @param aState [ParseState] the state to push.
+      # @return [TrueClass/FalseClass] true when the state is really added
       def push_state(aState)
-        @states << aState unless include?(aState)
+        if include?(aState)
+          result = false
+        else
+          @states << aState
+          result = true
+        end
+        return result
       end
       # The list of ParseState that expect the given symbol.

data/spec/rley/parser/chart_spec.rb CHANGED

@@ -1,5 +1,13 @@
 require_relative '../../spec_helper'
+require 'stringio'
+require_relative '../../../lib/rley/syntax/terminal'
+require_relative '../../../lib/rley/syntax/non_terminal'
+require_relative '../../../lib/rley/syntax/production'
+require_relative '../../../lib/rley/parser/token'
+require_relative '../../../lib/rley/parser/dotted_item'
+require_relative '../../../lib/rley/parser/parse_state'
+require_relative '../../../lib/rley/parser/parse_tracer'
 # Load the class under test
 require_relative '../../../lib/rley/parser/chart'
@@ -10,12 +18,21 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       let(:count_token) { 20 }
       let(:dotted_rule) { double('fake-dotted-item') }
+      let(:output) { StringIO.new('', 'w') }
+      let(:token_seq) do
+        literals = ['I', 'saw', 'John', 'with', 'a', 'dog']
+        literals.map {|lexeme| Token.new(lexeme, nil)}
+      end
+      let(:sample_tracer) { ParseTracer.new(0, output, token_seq) }
       context 'Initialization:' do
         # Default instantiation rule
-        subject { Chart.new(dotted_rule, count_token) }
+        subject { Chart.new(dotted_rule, count_token, sample_tracer) }
-        it 'should be created with a start dotted rule and a token count' do
-          expect { Chart.new(dotted_rule, count_token) }.not_to raise_error
+        it 'should be created with start dotted rule, token count, tracer' do
+          expect { Chart.new(dotted_rule, count_token, sample_tracer) }.not_to raise_error
         end
         it 'should have a seed state in first state_set' do
@@ -33,10 +50,60 @@ module Rley # Open this namespace to avoid module qualifier prefixes
         it 'should know the start dotted rule' do
           expect(subject.start_dotted_rule).to eq(dotted_rule)
         end
         it 'should have at least one non-empty state set' do
           expect(subject.last_index).to eq(0)
         end
+        it 'should reference a tracer' do
+          expect(subject.tracer).to eq(sample_tracer)
+        end
+      end # context
+      context 'Provided services:' do
+        let(:t_a) { Syntax::Terminal.new('A') }
+        let(:t_b) { Syntax::Terminal.new('B') }
+        let(:t_c) { Syntax::Terminal.new('C') }
+        let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
+        let(:sample_prod) do
+          Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
+        end
+        let(:origin_val) { 3 }
+        let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
+        let(:complete_rule) { DottedItem.new(sample_prod, 3) }
+        let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
+        let(:sample_tracer) { ParseTracer.new(1, output, token_seq) }
+        # Factory method.
+        def parse_state(origin, aDottedRule)
+          ParseState.new(aDottedRule, origin)
+        end
+        subject { Chart.new(dotted_rule, count_token, sample_tracer) }
+        it 'should trace its initialization' do
+          subject[0]  # Force constructor call here
+          expectation = <<-SNIPPET
+['I', 'saw', 'John', 'with', 'a', 'dog']
+|.  I   . saw  . John . with .  a   . dog  .|
+|>      .      .      .      .      .      .| [0:0] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectation)
+        end
+        it 'should trace parse state pushing' do
+          subject[0]  # Force constructor call here
+          output.string = ''
+          subject.push_state(dotted_rule, 3, 5, :prediction)
+          expectation = <<-SNIPPET
+|.      .      .      >      .| [3:5] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectation)
+        end
       end # context
     end # describe
   end # module

data/spec/rley/parser/earley_parser_spec.rb CHANGED

@@ -1,4 +1,5 @@
 require_relative '../../spec_helper'
+require 'stringio'
 require_relative '../../../lib/rley/syntax/verbatim_symbol'
 require_relative '../../../lib/rley/syntax/non_terminal'
@@ -226,6 +227,38 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           compare_state_texts(state_set_5, expected)
         end
+        it 'should trace a parse with level 1' do
+          # Substitute temporarily $stdout by a StringIO
+          prev_ostream = $stdout
+          $stdout = StringIO.new('', 'w')
+          trace_level = 1
+          parse_result = subject.parse(grm1_tokens, trace_level)
+          expectations = <<-SNIPPET
+['a', 'a', 'b', 'c', 'c']
+|. a . a . b . c . c .|
+|>   .   .   .   .   .| [0:0] S => . A
+|>   .   .   .   .   .| [0:0] A => . 'a' A 'c'
+|>   .   .   .   .   .| [0:0] A => . 'b'
+|[---]   .   .   .   .| [0:1] A => 'a' . A 'c'
+|.   >   .   .   .   .| [1:1] A => . 'a' A 'c'
+|.   >   .   .   .   .| [1:1] A => . 'b'
+|.   [---]   .   .   .| [1:2] A => 'a' . A 'c'
+|.   .   >   .   .   .| [2:2] A => . 'a' A 'c'
+|.   .   >   .   .   .| [2:2] A => . 'b'
+|.   .   [---]   .   .| [2:3] A => 'b' .
+|.   [------->   .   .| [1:3] A => 'a' A . 'c'
+|.   .   .   [---]   .| [3:4] A => 'a' A 'c' .
+|[--------------->   .| [0:4] A => 'a' A . 'c'
+|.   .   .   .   [---]| [4:5] A => 'a' A 'c' .
+|[===================]| [0:5] S => A .
+SNIPPET
+          expect($stdout.string).to eq(expectations)
+          # Restore standard ouput stream
+          $stdout = prev_ostream
+        end
         it 'should parse a valid simple expression' do
           instance = EarleyParser.new(grammar_expr)
           parse_result = instance.parse(grm2_tokens)

data/spec/rley/parser/parse_tracer_spec.rb ADDED

@@ -0,0 +1,193 @@
+require_relative '../../spec_helper'
+require 'stringio'
+require_relative '../../../lib/rley/syntax/terminal'
+require_relative '../../../lib/rley/syntax/non_terminal'
+require_relative '../../../lib/rley/syntax/production'
+require_relative '../../../lib/rley/parser/dotted_item'
+require_relative '../../../lib/rley/parser/parse_state'
+require_relative '../../../lib/rley/parser/token'
+# Load the class under test
+require_relative '../../../lib/rley/parser/parse_tracer'
+module Rley # Open this namespace to avoid module qualifier prefixes
+  module Parser # Open this namespace to avoid module qualifier prefixes
+    describe ParseTracer do
+      let(:output) { StringIO.new('', 'w') }
+      let(:token_seq) do
+        literals = ['I', 'saw', 'John', 'with', 'a', 'dog']
+        literals.map {|lexeme| Token.new(lexeme, nil)}
+      end
+      subject { ParseTracer.new(1, output, token_seq) }
+      context 'Creation & initialization:' do
+        it 'should accept trace level 0' do
+          expect { ParseTracer.new(0, output, token_seq) }.not_to raise_error
+          expect(output.string).to eq('')
+        end
+# |.  I   . saw  . John . with .  a   . dog  .|
+        it 'should accept trace level 1' do
+          expect { ParseTracer.new(1, output, token_seq) }.not_to raise_error
+          expectations = <<-SNIPPET
+['I', 'saw', 'John', 'with', 'a', 'dog']
+|.  I   . saw  . John . with .  a   . dog  .|
+SNIPPET
+          expect(output.string).to eq(expectations)
+        end
+        it 'should accept trace level 2' do
+          expect { ParseTracer.new(2, output, token_seq) }.not_to raise_error
+          expectations = <<-SNIPPET
+['I', 'saw', 'John', 'with', 'a', 'dog']
+|.  I   . saw  . John . with .  a   . dog  .|
+SNIPPET
+          expect(output.string).to eq(expectations)
+        end
+        it 'should know the trace level' do
+          expect(subject.level).to eq(1)
+        end
+        it 'should know the output stream' do
+          expect(subject.ostream).to eq(output)
+        end
+      end # context
+      context 'Provided services:' do
+        let(:t_a) { Syntax::Terminal.new('A') }
+        let(:t_b) { Syntax::Terminal.new('B') }
+        let(:t_c) { Syntax::Terminal.new('C') }
+        let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
+        let(:sample_prod) do
+          Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
+        end
+        let(:origin_val) { 3 }
+        let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
+        let(:complete_rule) { DottedItem.new(sample_prod, 3) }
+        let(:sample_parse_state) { ParseState.new(dotted_rule, origin_val) }
+        # Factory method.
+        def parse_state(origin, aDottedRule)
+          ParseState.new(aDottedRule, origin)
+        end
+        it 'should render a scanning step' do
+          # Case: token at the beginning
+          subject.ostream.string = ''
+          subject.trace_scanning(1, parse_state(0, dotted_rule))
+          expectations = <<-SNIPPET
+|[------]      .      .      .      .      .| [0:1] sentence => A B . C
+SNIPPET
+          # Case: token in the middle
+          subject.ostream.string = ''
+          subject.trace_scanning(4, sample_parse_state)
+          expectations = <<-SNIPPET
+|.      .      .      [------]      .      .| [3:4] sentence => A B . C
+SNIPPET
+          # Case: token at the end
+          subject.ostream.string = ''
+          subject.trace_scanning(6, parse_state(5, dotted_rule))
+          expectations = <<-SNIPPET
+|.      .      .      .      .      [------]| [5:6] sentence => A B . C
+SNIPPET
+        end
+        it 'should render a prediction step' do
+          # Case: initial stateset
+          subject.ostream.string = ''
+          subject.trace_prediction(0, parse_state(0, dotted_rule))
+          expectations = <<-SNIPPET
+|>      .      .      .      .      .      .| [0:0] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: stateset in the middle
+          subject.ostream.string = ''
+          subject.trace_prediction(3, sample_parse_state)
+          expectations = <<-SNIPPET
+|.      .      .      >      .      .      .| [3:3] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: final stateset
+          subject.ostream.string = ''
+          subject.trace_prediction(6, parse_state(6, dotted_rule))
+          expectations = <<-SNIPPET
+|.      .      .      .      .      .      >| [6:6] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectations)
+        end
+        it 'should render a completion step' do
+          # Case: full parse completed
+          subject.ostream.string = ''
+          subject.trace_completion(6, parse_state(0, complete_rule))
+          expectations = <<-SNIPPET
+|[=========================================]| [0:6] sentence => A B C .
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: step at the start (complete)
+          subject.ostream.string = ''
+          subject.trace_completion(1, parse_state(0, complete_rule))
+          expectations = <<-SNIPPET
+|[------]      .      .      .      .      .| [0:1] sentence => A B C .
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: step at the start (not complete)
+          subject.ostream.string = ''
+          subject.trace_completion(1, parse_state(0, dotted_rule))
+          expectations = <<-SNIPPET
+|[------>      .      .      .      .      .| [0:1] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: step at the middle (complete)
+          subject.ostream.string = ''
+          subject.trace_completion(4, parse_state(2, complete_rule))
+          expectations = <<-SNIPPET
+|.      .      [-------------]      .      .| [2:4] sentence => A B C .
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: step at the middle (not complete)
+          subject.ostream.string = ''
+          subject.trace_completion(4, parse_state(2, dotted_rule))
+          expectations = <<-SNIPPET
+|.      .      [------------->      .      .| [2:4] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: step at the end (complete)
+          subject.ostream.string = ''
+          subject.trace_completion(6, parse_state(3, complete_rule))
+          expectations = <<-SNIPPET
+|.      .      .      [--------------------]| [3:6] sentence => A B C .
+SNIPPET
+          expect(output.string).to eq(expectations)
+          # Case: step at the end (not complete)
+          subject.ostream.string = ''
+          subject.trace_completion(6, parse_state(3, dotted_rule))
+          expectations = <<-SNIPPET
+|.      .      .      [-------------------->| [3:6] sentence => A B . C
+SNIPPET
+          expect(output.string).to eq(expectations)
+        end
+      end # context
+    end # describe
+  end # module
+end # module
+# End of file

data/spec/rley/parser/parsing_spec.rb CHANGED

@@ -1,4 +1,5 @@
 require_relative '../../spec_helper'
+require 'stringio'
 require_relative '../../../lib/rley/syntax/non_terminal'
 require_relative '../../../lib/rley/syntax/verbatim_symbol'
@@ -6,6 +7,7 @@ require_relative '../../../lib/rley/syntax/production'
 require_relative '../../../lib/rley/syntax/grammar_builder'
 require_relative '../../../lib/rley/parser/dotted_item'
 require_relative '../../../lib/rley/parser/token'
+require_relative '../../../lib/rley/parser/parse_tracer'
 require_relative '../../../lib/rley/parser/earley_parser'
 require_relative '../support/grammar_abc_helper'
 require_relative '../support/grammar_b_expr_helper'
@@ -48,15 +50,18 @@ module Rley # Open this namespace to avoid module qualifier prefixes
       let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
+      let(:output) { StringIO.new('', 'w') }
+      let(:sample_tracer) { ParseTracer.new(0, output, grm1_tokens) }
       # Default instantiation rule
-      subject { Parsing.new(start_dotted_rule, grm1_tokens) }
+      subject { Parsing.new(start_dotted_rule, grm1_tokens, sample_tracer) }
       context 'Initialization:' do
-        it 'should be created with list of tokens and start dotted rule' do
+        it 'should be created with list of tokens, start dotted rule, trace' do
           start_rule = start_dotted_rule
           tokens = grm1_tokens
-          expect { Parsing.new(start_rule, tokens) }.not_to raise_error
+          tracer = sample_tracer
+          expect { Parsing.new(start_rule, tokens, tracer) }.not_to raise_error
         end
         it 'should know the input tokens' do
@@ -66,6 +71,17 @@ module Rley # Open this namespace to avoid module qualifier prefixes
         it 'should know its chart object' do
           expect(subject.chart).to be_kind_of(Chart)
         end
+        it 'should emit trace level 1 info' do
+          tracer = ParseTracer.new(1, output, grm1_tokens)
+          instance = Parsing.new(start_dotted_rule, grm1_tokens, tracer)
+          expectations = <<-SNIPPET
+['a', 'a', 'b', 'c', 'c']
+|. a . a . b . c . c .|
+|>   .   .   .   .   .| [0:0] S => . A
+SNIPPET
+          expect(output.string).to eq(expectations)
+        end
       end # context
       context 'Parsing:' do
@@ -73,27 +89,27 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           expect(subject.chart[1]).to be_empty
           item = DottedItem.new(prod_A1, 1)
-          subject.push_state(item, 1, 1)
+          subject.push_state(item, 1, 1, :scanning)
           expect(subject.chart[1]).not_to be_empty
           expect(subject.chart[1].first.dotted_rule).to eq(item)
           # Pushing twice the same state must be no-op
-          subject.push_state(item, 1, 1)
+          subject.push_state(item, 1, 1, :scanning)
           expect(subject.chart[1].size).to eq(1)
         end
         it 'should complain when trying to push a nil dotted item' do
           err = StandardError
           msg = 'Dotted item may not be nil'
-          expect { subject.push_state(nil, 1, 1) }.to raise_error(err, msg)
+          expect{ subject.push_state(nil, 1, 1, :prediction) }.to raise_error(err, msg)
         end
         it 'should retrieve the parse states that expect a given terminal' do
           item1 = DottedItem.new(prod_A1, 2)
           item2 = DottedItem.new(prod_A1, 1)
-          subject.push_state(item1, 2, 2)
-          subject.push_state(item2, 2, 2)
+          subject.push_state(item1, 2, 2, :scanning)
+          subject.push_state(item2, 2, 2, :scanning)
           states = subject.states_expecting(c_, 2, false)
           expect(states.size).to eq(1)
           expect(states[0].dotted_rule).to eq(item1)
@@ -106,8 +122,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           item1 = DottedItem.new(prod_A1, 0)
           item2 = DottedItem.new(prod_A2, 0)
-          subject.push_state(item1, 0, 0)
-          subject.push_state(item2, 0, 0)
+          subject.push_state(item1, 0, 0, :completion)
+          subject.push_state(item2, 0, 0, :completion)
           subject.scanning(a_, 0) { |i| i } # Code block is mock
           # Expected side effect: a new state at chart[1]
@@ -117,7 +133,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
           expect(new_state.origin).to eq(0)
         end
       end # context
+=begin
       context 'Parse tree building:' do
         let(:sample_grammar1) do
           builder = grammar_abc_builder
@@ -385,6 +401,7 @@ SNIPPET
           expect(actual).to eq(expected_text.chomp)
         end
       end # context
+=end
     end # describe
   end # module
 end # module

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rley
 version: !ruby/object:Gem::Version
-  version: 0.2.03
+  version: 0.2.04
 platform: ruby
 authors:
 - Dimitri Geshef
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-02-06 00:00:00.000000000 Z
+date: 2015-03-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -80,7 +80,7 @@ dependencies:
     - - ! '>='
       - !ruby/object:Gem::Version
         version: 2.0.0
-description: A Ruby implementation of the Earley's parsing algorithm
+description: A general parser using the Earley algorithm.
 email: famished.tiger@yahoo.com
 executables: []
 extensions: []
@@ -104,8 +104,11 @@ files:
 - examples/parsers/parsing_abc.rb
 - examples/parsers/parsing_ambig.rb
 - examples/parsers/parsing_b_expr.rb
+- examples/parsers/parsing_err_expr.rb
+- examples/parsers/parsing_groucho.rb
 - examples/parsers/parsing_L0.rb
 - examples/parsers/parsing_L1.rb
+- examples/parsers/parsing_tricky.rb
 - examples/recognizers/recognizer_abc.rb
 - lib/rley.rb
 - lib/rley/constants.rb
@@ -117,6 +120,7 @@ files:
 - lib/rley/parser/earley_parser.rb
 - lib/rley/parser/parse_state.rb
 - lib/rley/parser/parse_state_tracker.rb
+- lib/rley/parser/parse_tracer.rb
 - lib/rley/parser/parse_tree_builder.rb
 - lib/rley/parser/parsing.rb
 - lib/rley/parser/state_set.rb
@@ -142,6 +146,7 @@ files:
 - spec/rley/parser/dotted_item_spec.rb
 - spec/rley/parser/earley_parser_spec.rb
 - spec/rley/parser/parse_state_spec.rb
+- spec/rley/parser/parse_tracer_spec.rb
 - spec/rley/parser/parse_tree_builder_spec.rb
 - spec/rley/parser/parsing_spec.rb
 - spec/rley/parser/state_set_spec.rb
@@ -203,6 +208,7 @@ test_files:
 - spec/rley/parser/dotted_item_spec.rb
 - spec/rley/parser/earley_parser_spec.rb
 - spec/rley/parser/parse_state_spec.rb
+- spec/rley/parser/parse_tracer_spec.rb
 - spec/rley/parser/parse_tree_builder_spec.rb
 - spec/rley/parser/parsing_spec.rb
 - spec/rley/parser/state_set_spec.rb