RubyGems - rley - Versions diffs - 0.7.08 → 0.8.00 - Mend

rley 0.7.08 → 0.8.00

Files changed (71) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +6 -0
data/README.md +4 -5
data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
data/examples/NLP/pico_en_demo.rb +2 -2
data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
data/examples/data_formats/JSON/json_demo.rb +1 -2
data/examples/data_formats/JSON/json_grammar.rb +11 -11
data/examples/general/calc_iter1/calc_grammar.rb +5 -4
data/examples/general/calc_iter2/calc_grammar.rb +9 -9
data/examples/general/left.rb +1 -1
data/examples/general/right.rb +1 -1
data/lib/rley.rb +1 -1
data/lib/rley/base/dotted_item.rb +5 -0
data/lib/rley/base/grm_items_builder.rb +6 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/engine.rb +2 -2
data/lib/rley/interface.rb +16 -0
data/lib/rley/notation/all_notation_nodes.rb +2 -0
data/lib/rley/notation/ast_builder.rb +191 -0
data/lib/rley/notation/ast_node.rb +44 -0
data/lib/rley/notation/ast_visitor.rb +113 -0
data/lib/rley/notation/grammar.rb +49 -0
data/lib/rley/notation/grammar_builder.rb +451 -0
data/lib/rley/notation/grouping_node.rb +23 -0
data/lib/rley/notation/parser.rb +56 -0
data/lib/rley/notation/sequence_node.rb +35 -0
data/lib/rley/notation/symbol_node.rb +29 -0
data/lib/rley/notation/tokenizer.rb +192 -0
data/lib/rley/parse_rep/ast_base_builder.rb +13 -0
data/lib/rley/parser/gfg_chart.rb +100 -6
data/lib/rley/parser/gfg_parsing.rb +5 -3
data/lib/rley/parser/parse_entry_set.rb +1 -1
data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +45 -15
data/lib/rley/syntax/grm_symbol.rb +1 -1
data/lib/rley/syntax/match_closest.rb +43 -0
data/lib/rley/syntax/production.rb +6 -0
data/spec/rley/engine_spec.rb +6 -6
data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
data/spec/rley/notation/grammar_builder_spec.rb +295 -0
data/spec/rley/notation/parser_spec.rb +184 -0
data/spec/rley/notation/tokenizer_spec.rb +370 -0
data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
data/spec/rley/parse_rep/groucho_spec.rb +1 -1
data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
data/spec/rley/parser/dangling_else_spec.rb +445 -0
data/spec/rley/parser/gfg_earley_parser_spec.rb +95 -9
data/spec/rley/parser/gfg_parsing_spec.rb +1 -1
data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
data/spec/rley/support/grammar_abc_helper.rb +2 -2
data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
data/spec/rley/support/grammar_l0_helper.rb +2 -2
data/spec/rley/support/grammar_pb_helper.rb +2 -2
data/spec/rley/support/grammar_sppf_helper.rb +2 -2
data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +30 -11
data/spec/rley/syntax/match_closest_spec.rb +46 -0
data/spec/rley/syntax/production_spec.rb +4 -0
metadata +29 -14
data/lib/rley/parser/parse_state.rb +0 -78
data/lib/rley/parser/parse_state_tracker.rb +0 -59
data/lib/rley/parser/state_set.rb +0 -100
data/spec/rley/parser/parse_state_spec.rb +0 -125
data/spec/rley/parser/parse_tracer_spec.rb +0 -200
data/spec/rley/parser/state_set_spec.rb +0 -130

data/lib/rley/notation/grouping_node.rb ADDED Viewed

@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+require_relative 'sequence_node'
+module Rley
+  module Notation
+    # A syntax node representing an expression bracketed by parentheses.
+    class GroupingNode < SequenceNode
+      # @param aPosition [Rley::Lexical::Position] Start position.
+      # @param sequence [Array<ASTNode>] sequence of AST nodes
+      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
+      def initialize(aPosition, sequence, theRepetition = nil)
+        super(aPosition, sequence, theRepetition)
+      end
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param visitor [Notation::ASTVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_grouping_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/parser.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+require_relative 'tokenizer'
+require_relative 'grammar'
+require_relative 'ast_builder'
+module Rley
+  module Notation
+    # A Lox parser that produce concrete parse trees.
+    # Concrete parse trees are the default kind of parse tree
+    # generated by the Rley library.
+    # They consist of two node types only:
+    # - NonTerminalNode
+    # - TerminalNode
+    # A NonTerminalNode has zero or more child nodes (called subnodes)
+    # A TerminalNode is leaf node, that is, it has no child node.
+    # While concrete parse tree nodes can be generated out of the box,
+    # they have the following drawbacks:
+    # - Generic node classes that aren't always suited for the needs of
+    #     the language being processing.
+    # - Concrete parse tree tend to be deeply nested, which may complicate
+    #   further processing.
+    class Parser
+      # @return [Rley::Engine] A facade object for the Rley parsing library
+      attr_reader(:engine)
+      def initialize
+        # Create a Rley facade object
+        @engine = Rley::Engine.new do |cfg|
+          cfg.diagnose = true
+          cfg.repr_builder = Notation::ASTBuilder
+        end
+        # Step 1. Load RGN grammar
+        @engine.use_grammar(Rley::Notation::RGNGrammar)
+      end
+      # Parse the given Lox program into a parse tree.
+      # @param source [String] Lox program to parse
+      # @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
+      def parse(source)
+        lexer = Tokenizer.new(source)
+        result = engine.parse(lexer.tokens)
+        unless result.success?
+          # Stop if the parse failed...
+          line1 = "Parsing failed\n"
+          line2 = "Reason: #{result.failure_reason.message}"
+          raise SyntaxError, line1 + line2
+        end
+        return engine.convert(result) # engine.to_ptree(result)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/sequence_node.rb ADDED Viewed

@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+require_relative 'ast_node'
+module Rley
+  module Notation
+    # A syntax node for a sequence of AST nodes
+    class SequenceNode < ASTNode
+      # @return [Array<ASTNode>]
+      attr_reader :subnodes
+      attr_accessor :constraints
+      # @param aPosition [Rley::Lexical::Position] Start position.
+      # @param sequence [Array<ASTNode>] sequence of AST nodes
+      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
+      def initialize(aPosition, sequence, theRepetition = nil)
+        super(aPosition)
+        @subnodes = sequence
+        repetition=(theRepetition) if theRepetition
+        @constraints = []
+      end
+      def size
+        subnodes.size
+      end
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param visitor [Notation::ASTVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_sequence_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/symbol_node.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+require_relative 'ast_node'
+module Rley
+  module Notation
+    # A syntax node for a grammar symbol occurring in rhs of a rule
+    class SymbolNode < ASTNode
+      # @return [String] name of grammar symbol
+      attr_reader :name
+      # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
+      # @param aName [String] name of grammar symbol
+      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
+      def initialize(aPosition, aName, theRepetition = nil)
+        super(aPosition)
+        @name = aName
+        repetition=(theRepetition) if theRepetition
+      end
+      # Abstract method (must be overriden in subclasses).
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param _visitor [LoxxyTreeVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_symbol_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/notation/tokenizer.rb ADDED Viewed

@@ -0,0 +1,192 @@
+# frozen_string_literal: true
+require 'strscan'
+require_relative '../lexical/token'
+module Rley
+  module Notation
+    # A tokenizer for the Rley notation language.
+    # Responsibility: break input into a sequence of token objects.
+    # The tokenizer should recognize:
+    # Identifiers,
+    # Number literals including single digit
+    # String literals (quote delimited)
+    # Delimiters: e.g. parentheses '(',  ')'
+    # Separators: e.g. comma
+    class Tokenizer
+      # @return [StringScanner] Low-level input scanner
+      attr_reader(:scanner)
+      # @return [Integer] The current line number
+      attr_reader(:lineno)
+      # @return [Integer] Position of last start of line in the input
+      attr_reader(:line_start)
+      # One or two special character tokens.
+      @@lexeme2name = {
+        '(' => 'LEFT_PAREN',
+        ')' => 'RIGHT_PAREN',
+        '{' => 'LEFT_BRACE',
+        '}' => 'RIGHT_BRACE',
+        ',' => 'COMMA',
+        '+' => 'PLUS',
+        '?' => 'QUESTION_MARK',
+        '*' => 'STAR',
+        '..' => 'ELLIPSIS'
+      }.freeze
+      # Here are all the implemented Rley notation keywords
+      @@keywords = %w[
+        match_closest repeat
+      ].map { |x| [x, x] }.to_h
+      # Constructor. Initialize a tokenizer for Lox input.
+      # @param source [String] Lox text to tokenize.
+      def initialize(source = nil)
+        @scanner = StringScanner.new('')
+        start_with(source) if source
+      end
+      # Reset the tokenizer and make the given text, the current input.
+      # @param source [String] Lox text to tokenize.
+      def start_with(source)
+        @scanner.string = source
+        @lineno = 1
+        @line_start = 0
+      end
+      # Scan the source and return an array of tokens.
+      # @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
+      def tokens
+        tok_sequence = []
+        until @scanner.eos?
+          token = _next_token
+          tok_sequence << token unless token.nil?
+        end
+        return tok_sequence
+      end
+      private
+      def _next_token
+        pos_before = scanner.pos
+        skip_intertoken_spaces
+        ws_found = true if scanner.pos > pos_before
+        curr_ch = scanner.peek(1)
+        return nil if curr_ch.nil? || curr_ch.empty?
+        token = nil
+        if '(){},'.include? curr_ch
+          # Single delimiter, separator or character
+          token = build_token(@@lexeme2name[curr_ch], scanner.getch)
+        elsif '?*+,'.include? curr_ch # modifier character
+          # modifiers without prefix text are symbols
+          symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
+          token = build_token(symb, scanner.getch)
+        elsif (lexeme = scanner.scan(/\.\./))
+          # One or two special character tokens
+          token = build_token(@@lexeme2name[lexeme], lexeme)
+        elsif scanner.check(/"|'/) # Start of string detected...
+          token = build_string_token
+        elsif (lexeme = scanner.scan(/\d+/))
+          token = build_token('INT_LIT', lexeme)
+        elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
+          keyw = @@keywords[lexeme.chop!]
+          token = build_token('KEY', lexeme) if keyw
+          # ... error case
+        elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
+           token = build_token('SYMBOL', lexeme)
+        else # Unknown token
+          col = scanner.pos - @line_start + 1
+          _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
+          raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
+        end
+        return token
+      end
+      def build_token(aSymbolName, aLexeme)
+        begin
+          lex_length = aLexeme ? aLexeme.size : 0
+          col = scanner.pos - lex_length - @line_start + 1
+          pos = Rley::Lexical::Position.new(@lineno, col)
+          token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
+        rescue StandardError => e
+          puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
+          raise e
+        end
+        return token
+      end
+      # precondition: current position at leading quote
+      def build_string_token
+        delimiter = scanner.scan(/./)
+        scan_pos = scanner.pos
+        line = @lineno
+        column_start = scan_pos - @line_start
+        literal = +''
+        loop do
+          substr = scanner.scan(/[^"'\\\r\n]*/)
+          if scanner.eos?
+            pos_start = "line #{line}:#{column_start}"
+            raise ScanError, "Error: [#{pos_start}]: Unterminated string."
+          else
+            literal << substr
+            special = scanner.scan(/["'\\\r\n]/)
+            case special
+            when delimiter # Terminating quote found
+              break
+            when "\r"
+              next_line
+              special << scanner.scan(/./) if scanner.match?(/\n/)
+              literal << special
+            when "\n"
+              next_line
+              literal << special
+            # when '\\'
+              # ch = scanner.scan(/./)
+              # next unless ch
+              # escaped = @@escape_chars[ch]
+              # if escaped
+                # literal << escaped
+              # else
+                # literal << ch
+              # end
+            end
+          end
+        end
+        pos = Rley::Lexical::Position.new(line, column_start)
+        lexeme = scanner.string[scan_pos - 1..scanner.pos - 1]
+        Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
+      end
+      # Skip non-significant whitespaces and comments.
+      # Advance the scanner until something significant is found.
+      def skip_intertoken_spaces
+        loop do
+          ws_found = scanner.skip(/[ \t\f]+/) ? true : false
+          nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
+          if nl_found
+            ws_found = true
+            next_line
+          end
+          break unless ws_found
+        end
+        scanner.pos
+      end
+      def next_line
+        @lineno += 1
+        @line_start = scanner.pos
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/parse_rep/ast_base_builder.rb CHANGED Viewed

@@ -123,6 +123,19 @@ module Rley # This module is used as a namespace
         end
         return node
       end
+      # Standard method for handling one or more modifier: symbol+
+      # rule('symbol_plus' => 'symbol_plus symbol')
+      def reduce_base_plus_more(_production, _range, _tokens, theChildren)
+        theChildren[0] << theChildren[1]
+      end
+      # Standard rule method handling one or more modifier: symbol+
+      # rule('symbol_plus' => 'symbol')
+      def reduce_base_plus_last(_production, _range, _tokens, theChildren)
+        [theChildren[0]]
+      end
     end # class
   end # module
 end # module

data/lib/rley/parser/gfg_chart.rb CHANGED Viewed

@@ -12,11 +12,15 @@ module Rley # This module is used as a namespace
     # the chart is an array with n + 1 entry sets.
     class GFGChart
       # @return [Array<ParseEntrySet>] entry sets (one per input token + 1)
-      attr_reader(:sets)
+      attr_reader :sets
+      # @return [Array<Array<Syntax::MatchClosest>>]
+      attr_reader :constraints
       # @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
       def initialize(aGFGraph)
         @sets = [ParseEntrySet.new]
+        @constraints = [[]]
         push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
       end
@@ -42,6 +46,18 @@ module Rley # This module is used as a namespace
         end
       end
+      # if an entry corresponds to dotted item with a constraint
+      # make this constraint active for this index
+      # :before 'IF'
+      # search backwards to find nearest 'IF' scan rule
+      # in n+1, retrieve all items with IF . pattern
+      # create a lambda
+      # for every subsequent push_entry with same index,
+      # the lambda checks the condition (i.e pattern: ELSE . )
+      # if the condition is false, then push new entry
+      # if the condition is true but the consequent is false, then discard push action
+      # consequent: candidate refers to same dotted_item and same origin, then condition is false
       # Push a parse entry for the chart entry with given index
       # @param anIndex [Integer] The rank of the token in the input stream.
       # @return [ParseEntry] the passed parse entry if it is pushed
@@ -51,14 +67,48 @@ module Rley # This module is used as a namespace
         # puts "  anOrigin: #{anOrigin}"
         # puts "  anIndex: #{anIndex}"
         # puts "  _reason: #{_reason}"
-        new_entry = ParseEntry.new(aVertex, anOrigin)
         if anIndex == sets.size
-          err_msg = "Internal error: unexpected push reason #{reason}"
-          raise StandardError, err_msg if reason != :scan_rule
+          if reason == :scan_rule
+            add_entry_set
+          else
+            err_msg = "Internal error: unexpected push reason #{reason}"
+            raise StandardError, err_msg
+          end
+        end
-          add_entry_set
+        reject = false
+        unless constraints[anIndex].empty?
+          constraints[anIndex].each do |ct|
+            case ct
+              when Syntax::MatchClosest
+                not_found = sets[anIndex][0].prev_symbol != aVertex.prev_symbol
+                next if not_found
+                some_mismatch = ct.entries.find do |en|
+                  (en.vertex.dotted_item.production == aVertex.dotted_item.production) &&
+                    (en.origin != anOrigin)
+                end
+                reject = true if some_mismatch
+            end
+          end
+        end
+        return nil if reject
+        new_entry = ParseEntry.new(aVertex, anOrigin)
+        result = self[anIndex].push_entry(new_entry)
+        if aVertex.kind_of?(GFG::ItemVertex) && aVertex.dotted_item.constraint
+          ct = aVertex.dotted_item.constraint
+          case ct
+            when Syntax::MatchClosest
+              update_match_closest(ct, anIndex)
+          end
+          constraints[anIndex] << ct
         end
-        self[anIndex].push_entry(new_entry)
+        result
       end
       # Retrieve the first parse entry added to this chart
@@ -113,6 +163,25 @@ module Rley # This module is used as a namespace
       end
       # rubocop: enable Lint/UselessAssignment
+      # Retrieve all entries that have a given terminal before the dot.
+      # @param criteria [Hash{Symbol => String}]
+      def search_entries(atIndex, criteria)
+        entries = sets[atIndex].entries
+        keyword = criteria.keys[0]
+        found = []
+        entries.each do |e|
+          case keyword
+            when :before # terminal before dot
+              term_name = criteria[keyword]
+              if e.dotted_entry? && e.vertex.dotted_item.position > -2
+                found << e if e.prev_symbol&.name == criteria[keyword]
+              end
+          end
+        end
+        found
+      end
       # @ return [String] A human-readable representation of the chart.
       def to_s
         result = +''
@@ -130,6 +199,31 @@ module Rley # This module is used as a namespace
       def add_entry_set
          @sets << ParseEntrySet.new
+         @constraints << []
+      end
+      def update_match_closest(aConstraint, anIndex)
+        # Locate in the chart the closest matching terminal...
+        i = anIndex - 1
+        loop do
+          first_entry = sets[i][0]
+          prev_symbol = first_entry.prev_symbol
+          break if prev_symbol.name == aConstraint.closest_symb
+          i -= 1
+          break if i < 0
+        end
+        # Retrieve all entries of the kind: closest_symb .
+        if i > 0
+          entries = sets[i].entries.select do |en|
+            if en.prev_symbol
+              en.prev_symbol.name == aConstraint.closest_symb
+            else
+              false
+            end
+          end
+          aConstraint.entries = entries
+        end
       end
     end # class
   end # module