RubyGems - rley - Versions diffs - 0.7.08 → 0.8.03 - Mend

rley 0.7.08 → 0.8.03

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (72) hide show

checksums.yaml +4 -4
data/.rubocop.yml +29 -5
data/CHANGELOG.md +28 -4
data/README.md +4 -5
data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
data/examples/data_formats/JSON/json_demo.rb +1 -2
data/examples/data_formats/JSON/json_grammar.rb +11 -11
data/examples/general/calc_iter1/calc_grammar.rb +5 -4
data/examples/general/calc_iter2/calc_grammar.rb +9 -9
data/examples/general/left.rb +1 -1
data/examples/general/right.rb +1 -1
data/lib/rley/base/dotted_item.rb +5 -0
data/lib/rley/base/grm_items_builder.rb +6 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/engine.rb +2 -2
data/lib/rley/interface.rb +16 -0
data/lib/rley/notation/all_notation_nodes.rb +4 -0
data/lib/rley/notation/ast_builder.rb +185 -0
data/lib/rley/notation/ast_node.rb +44 -0
data/lib/rley/notation/ast_visitor.rb +115 -0
data/lib/rley/notation/grammar.rb +49 -0
data/lib/rley/notation/grammar_builder.rb +505 -0
data/lib/rley/notation/grouping_node.rb +23 -0
data/lib/rley/notation/parser.rb +56 -0
data/lib/rley/notation/sequence_node.rb +35 -0
data/lib/rley/notation/symbol_node.rb +29 -0
data/lib/rley/notation/tokenizer.rb +180 -0
data/lib/rley/parse_rep/ast_base_builder.rb +44 -0
data/lib/rley/parser/gfg_chart.rb +101 -6
data/lib/rley/parser/gfg_earley_parser.rb +1 -1
data/lib/rley/parser/gfg_parsing.rb +5 -3
data/lib/rley/parser/parse_entry_set.rb +1 -1
data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +53 -15
data/lib/rley/syntax/grm_symbol.rb +1 -1
data/lib/rley/syntax/match_closest.rb +43 -0
data/lib/rley/syntax/production.rb +6 -0
data/lib/rley.rb +1 -1
data/spec/rley/engine_spec.rb +6 -6
data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
data/spec/rley/notation/grammar_builder_spec.rb +302 -0
data/spec/rley/notation/parser_spec.rb +183 -0
data/spec/rley/notation/tokenizer_spec.rb +364 -0
data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
data/spec/rley/parse_rep/groucho_spec.rb +1 -1
data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
data/spec/rley/parser/dangling_else_spec.rb +447 -0
data/spec/rley/parser/gfg_earley_parser_spec.rb +118 -10
data/spec/rley/parser/gfg_parsing_spec.rb +2 -1
data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
data/spec/rley/support/grammar_abc_helper.rb +2 -2
data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
data/spec/rley/support/grammar_l0_helper.rb +2 -2
data/spec/rley/support/grammar_pb_helper.rb +2 -2
data/spec/rley/support/grammar_sppf_helper.rb +2 -2
data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +29 -11
data/spec/rley/syntax/match_closest_spec.rb +46 -0
data/spec/rley/syntax/production_spec.rb +4 -0
metadata +29 -14
data/lib/rley/parser/parse_state.rb +0 -78
data/lib/rley/parser/parse_state_tracker.rb +0 -59
data/lib/rley/parser/state_set.rb +0 -100
data/spec/rley/parser/parse_state_spec.rb +0 -125
data/spec/rley/parser/parse_tracer_spec.rb +0 -200
data/spec/rley/parser/state_set_spec.rb +0 -130

data/lib/rley/notation/grouping_node.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require_relative 'sequence_node'
+module Rley
+  module Notation
+    # A syntax node representing an expression bracketed by parentheses.
+    class GroupingNode < SequenceNode
+      # @param aPosition [Rley::Lexical::Position] Start position.
+      # @param sequence [Array<ASTNode>] sequence of AST nodes
+      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
+      def initialize(aPosition, sequence, theRepetition = nil)
+        super(aPosition, sequence, theRepetition)
+      end
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param visitor [Notation::ASTVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_grouping_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/parser.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+require_relative 'tokenizer'
+require_relative 'grammar'
+require_relative 'ast_builder'
+module Rley
+  module Notation
+    # A Lox parser that produce concrete parse trees.
+    # Concrete parse trees are the default kind of parse tree
+    # generated by the Rley library.
+    # They consist of two node types only:
+    # - NonTerminalNode
+    # - TerminalNode
+    # A NonTerminalNode has zero or more child nodes (called subnodes)
+    # A TerminalNode is leaf node, that is, it has no child node.
+    # While concrete parse tree nodes can be generated out of the box,
+    # they have the following drawbacks:
+    # - Generic node classes that aren't always suited for the needs of
+    #     the language being processing.
+    # - Concrete parse tree tend to be deeply nested, which may complicate
+    #   further processing.
+    class Parser
+      # @return [Rley::Engine] A facade object for the Rley parsing library
+      attr_reader(:engine)
+      def initialize
+        # Create a Rley facade object
+        @engine = Rley::Engine.new do |cfg|
+          cfg.diagnose = true
+          cfg.repr_builder = Notation::ASTBuilder
+        end
+        # Step 1. Load RGN grammar
+        @engine.use_grammar(Rley::Notation::RGNGrammar)
+      end
+      # Parse the given Lox program into a parse tree.
+      # @param source [String] Lox program to parse
+      # @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
+      def parse(source)
+        lexer = Tokenizer.new(source)
+        result = engine.parse(lexer.tokens)
+        unless result.success?
+          # Stop if the parse failed...
+          line1 = "Parsing failed\n"
+          line2 = "Reason: #{result.failure_reason.message}"
+          raise SyntaxError, line1 + line2
+        end
+        return engine.convert(result) # engine.to_ptree(result)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/sequence_node.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+require_relative 'ast_node'
+module Rley
+  module Notation
+    # A syntax node for a sequence of AST nodes
+    class SequenceNode < ASTNode
+      # @return [Array<ASTNode>]
+      attr_reader :subnodes
+      attr_accessor :constraints
+      # @param aPosition [Rley::Lexical::Position] Start position.
+      # @param sequence [Array<ASTNode>] sequence of AST nodes
+      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
+      def initialize(aPosition, sequence, theRepetition = nil)
+        super(aPosition)
+        @subnodes = sequence
+        self.repetition = theRepetition if theRepetition
+        @constraints = []
+      end
+      def size
+        subnodes.size
+      end
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param visitor [Notation::ASTVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_sequence_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/symbol_node.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require_relative 'ast_node'
+module Rley
+  module Notation
+    # A syntax node for a grammar symbol occurring in rhs of a rule
+    class SymbolNode < ASTNode
+      # @return [String] name of grammar symbol
+      attr_reader :name
+      # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
+      # @param aName [String] name of grammar symbol
+      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
+      def initialize(aPosition, aName, theRepetition = nil)
+        super(aPosition)
+        @name = aName
+        self.repetition = theRepetition if theRepetition
+      end
+      # Abstract method (must be overriden in subclasses).
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param _visitor [LoxxyTreeVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_symbol_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/tokenizer.rb ADDED Viewed

@@ -0,0 +1,180 @@
+# frozen_string_literal: true
+require 'strscan'
+require_relative '../lexical/token'
+module Rley
+  module Notation
+    # A tokenizer for the Rley notation language.
+    # Responsibility: break input into a sequence of token objects.
+    # The tokenizer should recognize:
+    # Identifiers,
+    # Number literals including single digit
+    # String literals (quote delimited)
+    # Delimiters: e.g. parentheses '(',  ')'
+    # Separators: e.g. comma
+    class Tokenizer
+      # @return [StringScanner] Low-level input scanner
+      attr_reader(:scanner)
+      # @return [Integer] The current line number
+      attr_reader(:lineno)
+      # @return [Integer] Position of last start of line in the input
+      attr_reader(:line_start)
+      # One or two special character tokens.
+      @@lexeme2name = {
+        '(' => 'LEFT_PAREN',
+        ')' => 'RIGHT_PAREN',
+        '{' => 'LEFT_BRACE',
+        '}' => 'RIGHT_BRACE',
+        ',' => 'COMMA',
+        '+' => 'PLUS',
+        '?' => 'QUESTION_MARK',
+        '*' => 'STAR',
+        '..' => 'ELLIPSIS'
+      }.freeze
+      # Here are all the implemented Rley notation keywords
+      @@keywords = %w[
+        match_closest repeat
+      ].map { |x| [x, x] }.to_h
+      # Constructor. Initialize a tokenizer for Lox input.
+      # @param source [String] Lox text to tokenize.
+      def initialize(source = nil)
+        @scanner = StringScanner.new('')
+        start_with(source) if source
+      end
+      # Reset the tokenizer and make the given text, the current input.
+      # @param source [String] Lox text to tokenize.
+      def start_with(source)
+        @scanner.string = source
+        @lineno = 1
+        @line_start = 0
+      end
+      # Scan the source and return an array of tokens.
+      # @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
+      def tokens
+        tok_sequence = []
+        until @scanner.eos?
+          token = _next_token
+          tok_sequence << token unless token.nil?
+        end
+        return tok_sequence
+      end
+      private
+      def _next_token
+        pos_before = scanner.pos
+        skip_intertoken_spaces
+        ws_found = true if scanner.pos > pos_before
+        curr_ch = scanner.peek(1)
+        return nil if curr_ch.nil? || curr_ch.empty?
+        token = nil
+        if '(){},'.include? curr_ch
+          # Single delimiter, separator or character
+          token = build_token(@@lexeme2name[curr_ch], scanner.getch)
+        elsif '?*+,'.include? curr_ch # modifier character
+          # modifiers without prefix text are symbols
+          symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
+          token = build_token(symb, scanner.getch)
+        elsif (lexeme = scanner.scan(/\.\./))
+          # One or two special character tokens
+          token = build_token(@@lexeme2name[lexeme], lexeme)
+        elsif scanner.check(/"|'/) # Start of string detected...
+          token = build_string_token
+        elsif (lexeme = scanner.scan(/\d+/))
+          token = build_token('INT_LIT', lexeme)
+        elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
+          keyw = @@keywords[lexeme.chop!]
+          token = build_token('KEY', lexeme) if keyw
+          # ... error case
+        elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
+           token = build_token('SYMBOL', lexeme)
+        else # Unknown token
+          col = scanner.pos - @line_start + 1
+          _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
+          raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
+        end
+        return token
+      end
+      def build_token(aSymbolName, aLexeme)
+        begin
+          lex_length = aLexeme ? aLexeme.size : 0
+          col = scanner.pos - lex_length - @line_start + 1
+          pos = Rley::Lexical::Position.new(@lineno, col)
+          token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
+        rescue StandardError => e
+          puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
+          raise e
+        end
+        return token
+      end
+      # precondition: current position at leading quote
+      def build_string_token
+        delimiter = scanner.scan(/./)
+        scan_pos = scanner.pos
+        line = @lineno
+        column_start = scan_pos - @line_start
+        literal = +''
+        loop do
+          substr = scanner.scan(/[^"'\\\r\n]*/)
+          if scanner.eos?
+            pos_start = "line #{line}:#{column_start}"
+            raise ScanError, "Error: [#{pos_start}]: Unterminated string."
+          else
+            literal << substr
+            special = scanner.scan(/["'\\\r\n]/)
+            case special
+            when delimiter # Terminating quote found
+              break
+            when "\r"
+              next_line
+              special << scanner.scan(/./) if scanner.match?(/\n/)
+              literal << special
+            when "\n"
+              next_line
+              literal << special
+            end
+          end
+        end
+        pos = Rley::Lexical::Position.new(line, column_start)
+        Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
+      end
+      # Skip non-significant whitespaces and comments.
+      # Advance the scanner until something significant is found.
+      def skip_intertoken_spaces
+        loop do
+          ws_found = scanner.skip(/[ \t\f]+/) ? true : false
+          nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
+          if nl_found
+            ws_found = true
+            next_line
+          end
+          break unless ws_found
+        end
+        scanner.pos
+      end
+      def next_line
+        @lineno += 1
+        @line_start = scanner.pos
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/parse_rep/ast_base_builder.rb CHANGED Viewed

@@ -123,6 +123,50 @@ module Rley # This module is used as a namespace
         end
         return node
       end
+      # Standard method for handling one or more modifier: symbol+
+      # rule('symbol_plus' => 'symbol_plus symbol')
+      # def reduce_base_plus_more(_production, _range, _tokens, theChildren)
+        # theChildren[0] << theChildren[1]
+      # end
+      # Standard rule method handling one or more modifier: symbol+
+      # rule('symbol_plus' => 'symbol')
+      # def reduce_base_plus_last(_production, _range, _tokens, theChildren)
+        # [theChildren[0]]
+      # end
+      # Implicit rule generated for * modifier
+      # rule('X') => 'X item'.as '_star_more'
+      def reduce__star_more(_production, _range, _tokens, theChildren)
+        theChildren[0] << theChildren[1]
+        theChildren[0]
+      end
+      # Implicit rule generated for * modifier
+      # rule('X') => ''.as '_star_none'
+      def reduce__star_none(_production, _range, _tokens, _children)
+        []
+      end
+      # Implicit rule generated for + modifier
+      # rule('X') => 'X item'.as '_plus_more'
+      def reduce__plus_more(_production, _range, _tokens, theChildren)
+        theChildren[0] << theChildren[1]
+        theChildren[0]
+      end
+      # Implicit rule generated for + modifier
+      # rule('X') => 'item'.as '_plus_one'
+      def reduce__plus_one(_production, _range, _tokens, theChildren)
+        [theChildren[0]]
+      end
+      # Implicit rule generated for + modifier
+      # rule('X') => 'item'.as '_plus_one'
+      def reduce_return_children(_production, _range, _tokens, theChildren)
+        theChildren
+      end
     end # class
   end # module
 end # module

data/lib/rley/parser/gfg_chart.rb CHANGED Viewed

@@ -12,11 +12,15 @@ module Rley # This module is used as a namespace
     # the chart is an array with n + 1 entry sets.
     class GFGChart
       # @return [Array<ParseEntrySet>] entry sets (one per input token + 1)
-      attr_reader(:sets)
+      attr_reader :sets
+      # @return [Array<Array<Syntax::MatchClosest>>]
+      attr_reader :constraints
       # @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
       def initialize(aGFGraph)
         @sets = [ParseEntrySet.new]
+        @constraints = [[]]
         push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
       end
@@ -42,6 +46,18 @@ module Rley # This module is used as a namespace
         end
       end
+      # if an entry corresponds to dotted item with a constraint
+      # make this constraint active for this index
+      # :before 'IF'
+      # search backwards to find nearest 'IF' scan rule
+      # in n+1, retrieve all items with IF . pattern
+      # create a lambda
+      # for every subsequent push_entry with same index,
+      # the lambda checks the condition (i.e pattern: ELSE . )
+      # if the condition is false, then push new entry
+      # if the condition is true but the consequent is false, then discard push action
+      # consequent: candidate refers to same dotted_item and same origin, then condition is false
       # Push a parse entry for the chart entry with given index
       # @param anIndex [Integer] The rank of the token in the input stream.
       # @return [ParseEntry] the passed parse entry if it is pushed
@@ -51,14 +67,48 @@ module Rley # This module is used as a namespace
         # puts "  anOrigin: #{anOrigin}"
         # puts "  anIndex: #{anIndex}"
         # puts "  _reason: #{_reason}"
-        new_entry = ParseEntry.new(aVertex, anOrigin)
         if anIndex == sets.size
-          err_msg = "Internal error: unexpected push reason #{reason}"
-          raise StandardError, err_msg if reason != :scan_rule
+          if reason == :scan_rule
+            add_entry_set
+          else
+            err_msg = "Internal error: unexpected push reason #{reason}"
+            raise StandardError, err_msg
+          end
+        end
+        reject = false
+        unless constraints[anIndex].empty?
+          constraints[anIndex].each do |ct|
+            case ct
+              when Syntax::MatchClosest
+                not_found = sets[anIndex][0].prev_symbol != aVertex.prev_symbol
+                next if not_found
+                some_mismatch = ct.entries.find do |en|
+                  (en.vertex.dotted_item.production == aVertex.dotted_item.production) &&
+                    (en.origin != anOrigin)
+                end
+                reject = true if some_mismatch
+            end
+          end
+        end
+        return nil if reject
+        new_entry = ParseEntry.new(aVertex, anOrigin)
+        result = self[anIndex].push_entry(new_entry)
-          add_entry_set
+        if aVertex.kind_of?(GFG::ItemVertex) && aVertex.dotted_item.constraint
+          ct = aVertex.dotted_item.constraint
+          case ct
+            when Syntax::MatchClosest
+              update_match_closest(ct, anIndex)
+          end
+          constraints[anIndex] << ct
         end
-        self[anIndex].push_entry(new_entry)
+        result
       end
       # Retrieve the first parse entry added to this chart
@@ -113,6 +163,25 @@ module Rley # This module is used as a namespace
       end
       # rubocop: enable Lint/UselessAssignment
+      # Retrieve all entries that have a given terminal before the dot.
+      # @param criteria [Hash{Symbol => String}]
+      def search_entries(atIndex, criteria)
+        entries = sets[atIndex].entries
+        keyword = criteria.keys[0]
+        found = []
+        entries.each do |e|
+          case keyword
+            when :before # terminal before dot
+              term_name = criteria[keyword]
+              if e.dotted_entry? && e.vertex.dotted_item.position > -2
+                found << e if e.prev_symbol&.name == term_name
+              end
+          end
+        end
+        found
+      end
       # @ return [String] A human-readable representation of the chart.
       def to_s
         result = +''
@@ -130,6 +199,32 @@ module Rley # This module is used as a namespace
       def add_entry_set
          @sets << ParseEntrySet.new
+         @constraints << []
+      end
+      def update_match_closest(aConstraint, anIndex)
+        # Locate in the chart the closest matching terminal...
+        i = anIndex - 1
+        loop do
+          first_entry = sets[i][0]
+          prev_symbol = first_entry.prev_symbol
+          break if prev_symbol.name == aConstraint.closest_symb
+          i -= 1
+          break if i.negative?
+        end
+        # Retrieve all entries of the kind: closest_symb .
+        if i.positive?
+          entries = sets[i].entries.select do |en|
+            if en.prev_symbol
+              en.prev_symbol.name == aConstraint.closest_symb
+            else
+              false
+            end
+          end
+          aConstraint.entries = entries
+        end
       end
     end # class
   end # module