RubyGems - rley - Versions diffs - 0.4.01 → 0.4.02 - Mend

rley 0.4.01 → 0.4.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/LICENSE.txt +2 -2
data/README.md +3 -3
data/examples/NLP/mini_en_demo.rb +1 -1
data/examples/data_formats/JSON/JSON_demo.rb +1 -0
data/examples/data_formats/JSON/JSON_lexer.rb +4 -4
data/examples/general/calc/calc_lexer.rb +2 -2
data/lib/rley.rb +1 -1
data/lib/rley/constants.rb +1 -1
data/lib/rley/formatter/debug.rb +2 -2
data/lib/rley/formatter/json.rb +4 -4
data/lib/rley/parse_tree_visitor.rb +9 -9
data/lib/rley/parser/base_parser.rb +1 -1
data/lib/rley/parser/gfg_parsing.rb +9 -0
data/lib/rley/parser/parse_tree_builder.rb +176 -126
data/lib/rley/parser/parse_tree_factory.rb +57 -0
data/lib/rley/ptree/non_terminal_node.rb +10 -9
data/lib/rley/ptree/parse_tree_node.rb +10 -5
data/lib/rley/ptree/terminal_node.rb +14 -6
data/lib/rley/sppf/sppf_node.rb +2 -2
data/lib/rley/{parser → tokens}/token.rb +1 -4
data/lib/rley/{ptree → tokens}/token_range.rb +1 -1
data/spec/rley/formatter/debug_spec.rb +16 -16
data/spec/rley/formatter/json_spec.rb +8 -8
data/spec/rley/parse_forest_visitor_spec.rb +1 -1
data/spec/rley/parse_tree_visitor_spec.rb +28 -28
data/spec/rley/parser/error_reason_spec.rb +3 -3
data/spec/rley/parser/gfg_chart_spec.rb +2 -2
data/spec/rley/parser/gfg_earley_parser_spec.rb +2 -2
data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
data/spec/rley/parser/groucho_spec.rb +1 -1
data/spec/rley/parser/parse_tracer_spec.rb +2 -2
data/spec/rley/parser/parse_tree_builder_spec.rb +213 -140
data/spec/rley/parser/parse_tree_factory_spec.rb +85 -0
data/spec/rley/parser/parse_walker_factory_spec.rb +11 -10
data/spec/rley/ptree/non_terminal_node_spec.rb +23 -20
data/spec/rley/ptree/terminal_node_spec.rb +7 -12
data/spec/rley/sppf/alternative_node_spec.rb +2 -2
data/spec/rley/sppf/non_terminal_node_spec.rb +2 -2
data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
data/spec/rley/support/expectation_helper.rb +1 -1
data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
data/spec/rley/support/grammar_helper.rb +3 -3
data/spec/rley/support/grammar_l0_helper.rb +2 -2
data/spec/rley/support/grammar_pb_helper.rb +2 -2
data/spec/rley/{ptree → tokens}/token_range_spec.rb +2 -2
data/spec/rley/{parser → tokens}/token_spec.rb +2 -2
metadata +11 -17
data/lib/rley/parser/chart.rb +0 -82
data/lib/rley/parser/earley_parser.rb +0 -203
data/lib/rley/parser/parsing.rb +0 -265
data/spec/rley/parser/chart_spec.rb +0 -120
data/spec/rley/parser/earley_parser_spec.rb +0 -710
data/spec/rley/parser/parsing_spec.rb +0 -408

data/lib/rley/parser/chart.rb DELETED

@@ -1,82 +0,0 @@
-require_relative 'state_set'
-require_relative 'parse_state'
-module Rley # This module is used as a namespace
-  module Parser # This module is used as a namespace
-    # Also called a parse table
-    # A one-dimensional array with n + 1 entries (n = number of input tokens).
-    class Chart
-      # An array of state sets (one per input token + 1)
-      attr_reader(:state_sets)
-      # The level of trace details reported on stdout during the parse.
-      # The possible values are:
-      # 0: No trace output (default case)
-      # 1: Show trace of scanning and completion rules
-      # 2: Same as of 1 with the addition of the prediction rules
-      attr_reader(:tracer)
-      # @param startItems [Array] A non-empty Array of dotted items for
-      #   the start symbol.
-      # @param tokenCount [Fixnum] The number of lexemes in the input to parse.
-      # @param aTracer [ParseTracer] A tracer object.
-      def initialize(startItems, tokenCount, aTracer)
-        @tracer = aTracer
-        @state_sets = Array.new(tokenCount + 1) { |_| StateSet.new }
-        startItems.each do |startDottedItem|
-          push_state(startDottedItem, 0, 0, :start_rule)
-        end
-      end
-      # The dotted item/rule used to seed the parse chart.
-      # It corresponds to the start production and a dot placed
-      # at the beginning of the rhs
-      def start_dotted_rule()
-        return self[0].states.first.dotted_rule
-      end
-      # Return the start (non-terminal) symbol of the grammar.
-      def start_symbol()
-        return state_sets.first.states[0].dotted_rule.lhs
-      end
-      # Access the state set at given position
-      def [](index)
-        return state_sets[index]
-      end
-      # Return the index value of the last non-empty state set.
-      def last_index()
-        first_empty = state_sets.find_index(&:empty?)
-        index = if first_empty.nil?
-                  state_sets.size - 1
-                else
-                  first_empty.zero? ? 0 : first_empty - 1
-                end
-        return index
-      end
-      # Push a parse state for the chart entry with given index
-      def push_state(aDottedItem, anOrigin, anIndex, aReason)
-        new_state = ParseState.new(aDottedItem, anOrigin)
-        pushed = self[anIndex].push_state(new_state)
-        return unless pushed && tracer.level > 0
-        case aReason
-          when :start_rule, :prediction
-            tracer.trace_prediction(anIndex, new_state)
-          when :scanning
-             tracer.trace_scanning(anIndex, new_state)
-          when :completion
-             tracer.trace_completion(anIndex, new_state)
-          else
-            raise NotImplementedError, "Unknown push_state mode #{aReason}"
-        end
-      end
-    end # class
-  end # module
-end # module
-# End of file

data/lib/rley/parser/earley_parser.rb DELETED

@@ -1,203 +0,0 @@
-require_relative 'base_parser'
-module Rley # This module is used as a namespace
-  module Parser # This module is used as a namespace
-    # Implementation of a parser that uses the Earley parsing algorithm.
-    class EarleyParser < BaseParser
-      # A Hash that defines the mapping: non-terminal => [start dotted items]
-      attr_reader(:start_mapping)
-      # A Hash that defines the mapping: dotted item => next dotted item
-      # In other words, the 'next_mapping' allows to find the dotted item
-      # after "advancing" the dot
-      attr_reader(:next_mapping)
-      def initialize(aGrammar)
-        super(aGrammar)
-        @start_mapping = build_start_mapping(dotted_items)
-        @next_mapping = build_next_mapping(dotted_items)
-      end
-      # Parse a sequence of input tokens.
-      # @param aTokenSequence [Array] Array of Tokens objects returned by a
-      # tokenizer/scanner/lexer.
-      # @param aTraceLevel [Fixnum] The specified trace level.
-      # The possible values are:
-      # 0: No trace output (default case)
-      # 1: Show trace of scanning and completion rules
-      # 2: Same as of 1 with the addition of the prediction rules
-      # @return [Parsing] an object that embeds the parse results.
-      def parse(aTokenSequence, aTraceLevel = 0)
-        tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
-        result = Parsing.new(start_dotted_items, aTokenSequence, tracer)
-        last_token_index = aTokenSequence.size
-        (0..last_token_index).each do |i|
-          handle_error(result) if result.chart[i].empty?
-          predicted = Set.new
-          result.chart[i].each do |state|
-            if state.complete? # End of production reached?
-              completion(result, state, i, tracer)
-            else
-              next_symbol = state.next_symbol
-              if next_symbol.kind_of?(Syntax::NonTerminal)
-                unless predicted.include? next_symbol
-                  prediction(result, state, next_symbol, i, tracer)
-                  predicted << next_symbol # Avoid repeated predictions
-                end
-              elsif i < last_token_index
-                # Expecting a terminal symbol
-                scanning(result, next_symbol, i, tracer)
-              end
-            end
-          end
-        end
-        return result
-      end
-      private
-      # Create a Hash with pairs of the kind:
-      # non-terminal => [start dotted items]
-      def build_start_mapping(theDottedItems)
-        mapping = {}
-        theDottedItems.each do |item|
-          next unless item.at_start?
-          lhs_symbol = item.lhs
-          map_entry = mapping.fetch(lhs_symbol, [])
-          map_entry << item
-          mapping[lhs_symbol] = map_entry
-        end
-        return mapping
-      end
-      # Create a Hash with pairs of the kind:
-      # dotted item => next dotted item
-      # next dotted item uses same production and the dot
-      # position is advanced by one symbol
-      def build_next_mapping(theDottedItems)
-        mapping = {}
-        theDottedItems.each_cons(2) do |(item1, item2)|
-          next if item1.production != item2.production
-          mapping[item1] = item2
-        end
-        return mapping
-      end
-      # The dotted item for the start production and
-      # with the dot at the beginning of the rhs
-      def start_dotted_items()
-        start_symbol = grammar.start_symbol
-        start_items = dotted_items.select do |anItem|
-          (anItem.lhs == start_symbol) && anItem.at_start?
-        end
-        return start_items
-      end
-      # This method is called when a parse state for chart entry at position
-      # 'pos' expects as next symbol a non-terminal.
-      # Given a predicted non-terminal 'nt' and a current token position
-      # 'pos':
-      # For each production with 'nt' as lhs, retrieve their corresponding
-      # initial dotted rules nt -> . xxxx
-      # For retrieved dotted rule, add a parse state to the chart entry
-      # at 'pos': <initial dotted rule, pos, pos>
-      # In short, one adds states to chart[pos], one per production that
-      # specifies how to reduce some input into the predicted nt (non-terminal)
-      # A prediction corresponds to a potential expansion of a nonterminal
-      # in a left-most derivation.
-      # @param aParsing [Parsing] the object that encapsulates the results
-      #   result of the parsing process
-      # @param aState [ParseState] current parse state being processed
-      # @param aNonTerminal [NonTerminal] a non-terminal symbol that
-      #   immediately follows a dot
-      #   (= is expected/predicted by the production rule)
-      # @param aPosition [Fixnum] position in the input token sequence.
-      def prediction(aParsing, aState, aNonTerminal, aPosition, aTracer)
-        if aTracer.level > 1
-          puts "Chart[#{aPosition}] Prediction(s) from #{aState}:"
-        end
-        # Retrieve all start dotted items for productions
-        # with aNonTerminal as its lhs
-        items = start_mapping[aNonTerminal]
-        items.each do |an_item|
-          aParsing.push_state(an_item, aPosition, aPosition, :prediction)
-        end
-        return unless aNonTerminal.nullable?
-        # Ayock-Horspool trick for nullable rules
-        next_item = next_mapping[aState.dotted_rule]
-        aParsing.push_state(next_item, aState.origin, aPosition, :prediction)
-      end
-      # This method is called when a parse state for chart entry at position
-      # 'pos' expects a terminal as next symbol.
-      # If the input token matches the terminal symbol then:
-      # Retrieve all parse states for chart entry at 'aPosition'
-      # that have the given terminal as next symbol.
-      # For each s of the above states, push to chart entry aPosition + 1
-      # a new state like: <next dotted rule, s.origin, aPosition + 1>
-      # In other words, we place the dotted rules in the next state set
-      # such that the dot appears after terminal.
-      # @param aParsing [Parsing] the object that encapsulates the results
-      #   result of the parsing process
-      # @param aTerminal [Terminal] a terminal symbol that
-      #   immediately follows a dot
-      # @param aPosition [Fixnum] position in the input token sequence.
-      def scanning(aParsing, aTerminal, aPosition, aTracer)
-        if aTracer.level > 1
-          prefix = "Chart[#{aPosition}] Scanning of terminal "
-          suffix = "#{aTerminal.name}:"
-          puts prefix + suffix
-        end
-        aParsing.scanning(aTerminal, aPosition) do |item|
-          next_mapping[item]
-        end
-      end
-      # This method is called when a parse state at chart entry reaches
-      # the end of a production.
-      # For every state in chart[aPosition] that is
-      # complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
-      # Find states s in chart[j] of the
-      #  form { dotted_rule: Y -> α • X β, origin: i}
-      #   In other words, rules that predicted the non-terminal X.
-      # For each s, add to chart[aPosition] a state of the form
-      #   { dotted_rule: Y → α X • β, origin: i})
-      def completion(aParsing, aState, aPosition, aTracer)
-        if aTracer.level > 1
-          puts "Chart[#{aPosition}] Completion of state #{aState}:"
-        end
-        aParsing.completion(aState, aPosition) do |item|
-          next_mapping[item]
-        end
-      end
-      # Raise an exception to indicate a syntax error.
-      def handle_error(aParsing)
-        # Retrieve the first empty state set
-        pos = aParsing.chart.state_sets.find_index(&:empty?)
-        lexeme_at_pos = aParsing.tokens[pos - 1].lexeme
-        terminals = aParsing.chart.state_sets[pos - 1].expected_terminals
-        term_names = terminals.map(&:name)
-        err_msg = "Syntax error at or near token #{pos}"
-        err_msg << ">>>#{lexeme_at_pos}<<<:\nExpected "
-        err_msg << if terminals.size > 1
-                     "one of: ['#{term_names.join("', '")}'],"
-                   else
-                     ": #{term_names[0]},"
-                   end
-        err_msg << " found a '#{aParsing.tokens[pos - 1].terminal.name}'"
-        raise StandardError, err_msg + ' instead.'
-      end
-    end # class
-  end # module
-end # module
-# End of file

data/lib/rley/parser/parsing.rb DELETED

@@ -1,265 +0,0 @@
-require_relative 'chart'
-require_relative 'parse_state_tracker'
-require_relative 'parse_tree_builder'
-module Rley # This module is used as a namespace
-  module Parser # This module is used as a namespace
-    class Parsing
-      attr_reader(:chart)
-      # The sequence of input token to parse
-      attr_reader(:tokens)
-      # @param aTracer [ParseTracer] An object that traces the parsing.
-      # The possible values are:
-      # 0: No trace output (default case)
-      # 1: Show trace of scanning and completion rules
-      # 2: Same as of 1 with the addition of the prediction rules
-      def initialize(startDottedRules, theTokens, aTracer)
-        @tokens = theTokens.dup
-        @chart = Chart.new(startDottedRules, tokens.size, aTracer)
-      end
-      # Return true if the parse was successful (= input tokens
-      # followed the syntax specified by the grammar)
-      def success?()
-        # Success can be detected as follows:
-        # The last chart entry has at least one complete parse state
-        # for the start symbol with an origin == 0
-        last_chart_entry = chart.state_sets[-1]
-        start_symbol = chart.start_symbol
-        # Retrieve all the complete states with start symbol in lhs
-        end_states = last_chart_entry.states_rewriting(start_symbol)
-        success_states = end_states.select { |st| st.origin.zero? }
-        return !success_states.empty?
-      end
-      # Return true if there are more than one complete state
-      # for the same lhs and same origin in any state set.
-      def ambiguous?()
-        found = chart.state_sets.find { |set| !set.ambiguities.empty? }
-        return !found.nil?
-      end
-      # Factory method. Builds a ParseTree from the parse result.
-      # @return [ParseTree]
-      # Algorithm:
-      # set state_set_index = index of last state set in chart
-      # Search the completed parse state that corresponds to the full parse
-      def parse_tree()
-        state_tracker = new_state_tracker
-        builder = tree_builder(state_tracker.state_set_index)
-        loop do
-          state_tracker.symbol_on_left
-          # match_symbol = state_tracker.symbol_on_left
-          # puts '--------------------'
-          # puts "Active parse state: #{state_tracker.parse_state}"
-          # puts "Matching symbol: #{match_symbol}"
-          # puts 'Parse tree:'
-          # puts builder.root.to_string(0)
-          # Place the symbol on left of the dot in the parse tree
-          done = insert_matched_symbol(state_tracker, builder)
-          break if done
-        end
-        return builder.parse_tree
-      end
-      # Push a parse state (dotted item + origin) to the
-      # chart entry with given index if it isn't yet in the chart entry.
-      def push_state(aDottedItem, anOrigin, aChartIndex, aReason)
-        raise StandardError, 'Dotted item may not be nil' if aDottedItem.nil?
-        chart.push_state(aDottedItem, anOrigin, aChartIndex, aReason)
-      end
-      # This method is called when a parse state for chart entry at position
-      # 'pos' expects a terminal as next symbol.
-      # If the input token matches the terminal symbol then:
-      # Retrieve all parse states for chart entry at 'aPosition'
-      # that have the given terminal as next symbol.
-      # For each s of the above states, push to chart entry aPosition + 1
-      # a new state like: <next dotted rule, s.origin, aPosition + 1>
-      # In other words, we place the dotted rules in the next state set
-      # such that the dot appears after terminal.
-      # @param aTerminal [Terminal] a terminal symbol that
-      #   immediately follows a dot
-      # @param aPosition [Fixnum] position in the input token sequence.
-      # @param nextMapping [Proc or Lambda] code to evaluate in order to
-      #   determine the "next" dotted rule for a given one.
-      def scanning(aTerminal, aPosition, &_nextMapping)
-        curr_token = tokens[aPosition]
-        return unless curr_token.terminal == aTerminal
-        states = states_expecting(aTerminal, aPosition, false)
-        states.each do |s|
-          next_item = yield s.dotted_rule
-          push_state(next_item, s.origin, aPosition + 1, :scanning)
-        end
-      end
-      # This method is called when a parse state at chart entry reaches the end
-      # of a production.
-      # For every state in chart[aPosition] that is complete
-      #  (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
-      # Find states s in chart[j] of the form
-      #  {dotted_rule: Y -> α • X β, origin: i}
-      #  In other words, rules that predicted the non-terminal X.
-      # For each s, add to chart[aPosition] a state of the form
-      #  { dotted_rule: Y → α X • β, origin: i})
-      def completion(aState, aPosition, &_nextMapping)
-        curr_origin = aState.origin
-        curr_lhs = aState.dotted_rule.lhs
-        states = states_expecting(curr_lhs, curr_origin, false)
-        states.each do |s|
-          next_item = yield s.dotted_rule
-          push_state(next_item, s.origin, aPosition, :completion)
-        end
-      end
-      # The list of ParseState from the chart entry at given position
-      # that expect the given terminal
-      def states_expecting(aTerminal, aPosition, toSort)
-        expecting = chart[aPosition].states_expecting(aTerminal)
-        return expecting if !toSort || expecting.size < 2
-        # Put predicted states ahead
-        (predicted, others) = expecting.partition(&:predicted?)
-        # Sort state in reverse order of their origin value
-        [predicted, others].each do |set|
-          set.sort! { |a, b| b.origin <=> a.origin }
-        end
-        return predicted + others
-      end
-      # Retrieve the parse state(s) that represents a complete, successful parse
-      # After a successful parse, the last chart entry
-      # has a parse state that involves the start symbol and
-      # has a dot positioned at the end of its rhs.
-      def end_parse_states()
-        last_chart_entry = chart.state_sets[-1]
-        start_symbol = chart.start_symbol
-        # Retrieve all the complete states with origin at 0
-        end_states = last_chart_entry.states_rewriting(start_symbol)
-        return end_states
-      end
-      # Insert in a parse tree the symbol on the left of the
-      # current dotted rule.
-      def insert_matched_symbol(aStateTracker, aBuilder)
-        # Retrieve symbol before the dot in active parse state
-        match_symbol = aStateTracker.symbol_on_left
-        # Retrieve tree node being processed
-        tree_node = aBuilder.current_node
-        done = false
-        case [match_symbol.class, tree_node.class]
-          when [Syntax::Terminal, PTree::TerminalNode]
-            aStateTracker.to_prev_state_set
-            predecessor_state_terminal(match_symbol, aStateTracker, aBuilder)
-          when [NilClass, Rley::PTree::TerminalNode],
-            [NilClass, PTree::NonTerminalNode]
-            # Retrieve all parse states that expect the lhs
-            new_states = states_expecting_lhs(aStateTracker, aBuilder)
-            done = true if new_states.empty?
-            # Select an unused parse state
-            aStateTracker.select_state(new_states)
-          when [Syntax::NonTerminal, PTree::NonTerminalNode]
-            completed_state_for(match_symbol, aStateTracker, aBuilder)
-        end
-        done ||= aBuilder.root == aBuilder.current_node
-        return done
-      end
-      private
-      # Factory method. Creates and initializes a ParseStateTracker instance.
-      def new_state_tracker()
-        instance = ParseStateTracker.new(chart.last_index)
-        instance.parse_state = end_parse_states.first
-        return instance
-      end
-      # A terminal symbol is on the left of dot.
-      # Go to the predecessor state for the given terminal
-      def predecessor_state_terminal(_a_symb, aStateTracker, aTreeBuilder)
-        index = aStateTracker.state_set_index
-        aTreeBuilder.current_node.range = { low: index, high: index + 1 }
-        link_node_to_token(aTreeBuilder, aStateTracker.state_set_index)
-        unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
-          raise StandardError, 'Expected terminal node'
-        end
-        aTreeBuilder.move_back
-        state_set = chart[aStateTracker.state_set_index]
-        previous_state = state_set.predecessor_state(aStateTracker.parse_state)
-        aStateTracker.parse_state = previous_state
-      end
-      # Retrieve a complete state with given terminal symbol as lhs.
-      def completed_state_for(a_symb, aTracker, aTreeBuilder)
-        new_states = chart[aTracker.state_set_index].states_rewriting(a_symb)
-        aTracker.select_state(new_states)
-        aTreeBuilder.range = { high: aTracker.state_set_index }
-        aTreeBuilder.use_complete_state(aTracker.parse_state)
-        link_node_to_token(aTreeBuilder, aTracker.state_set_index - 1)
-        aTreeBuilder.move_down
-      end
-      def states_expecting_lhs(aStateTracker, aTreeBuilder)
-        lhs = aStateTracker.curr_dotted_item.production.lhs
-        new_states = states_expecting(lhs, aStateTracker.state_set_index, true)
-        new_states.reject! { |st| st == aStateTracker.parse_state }
-        # Filter out parse states with incompatible range
-        if new_states.size > 1
-          previous_node = aTreeBuilder.current_path[-3]
-          new_states.select! do |parse_state|
-            parse_state.dotted_rule.production.lhs == previous_node.symbol
-          end
-        end
-        return new_states
-      end
-      # If the current node is a terminal node
-      # then link the token to that node
-      def link_node_to_token(aTreeBuilder, aStateSetIndex)
-        return unless aTreeBuilder.current_node.is_a?(PTree::TerminalNode)
-        return unless aTreeBuilder.current_node.token.nil?
-        a_node = aTreeBuilder.current_node
-        a_node.token = tokens[aStateSetIndex] unless a_node.token
-      end
-      # Factory method. Initializes a ParseTreeBuilder object
-      def tree_builder(anIndex)
-        full_range = { low: 0, high: anIndex }
-        start_production = chart.start_dotted_rule.production
-        return ParseTreeBuilder.new(start_production, full_range)
-      end
-    end # class
-  end # module
-end # module
-# End of file