RubyGems - rley - Versions diffs - 0.8.06 → 0.8.10 - Mend

rley 0.8.06 → 0.8.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

checksums.yaml +4 -4
data/.rubocop.yml +23 -2
data/CHANGELOG.md +21 -1
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/appveyor.yml +1 -3
data/examples/NLP/benchmark_pico_en.rb +6 -6
data/examples/NLP/engtagger.rb +6 -6
data/examples/general/calc_iter1/calc_lexer.rb +1 -1
data/examples/general/calc_iter2/calc_lexer.rb +1 -1
data/examples/general/left.rb +1 -1
data/examples/general/right.rb +1 -1
data/examples/tokenizer/loxxy_raw_scanner.rex.rb +3 -0
data/examples/tokenizer/loxxy_tokenizer.rb +2 -2
data/examples/tokenizer/run_tokenizer.rb +1 -1
data/examples/tokenizer/{tokens.yaml → tokens.yml} +0 -0
data/lib/rley/constants.rb +1 -1
data/lib/rley/engine.rb +2 -2
data/lib/rley/interface.rb +3 -3
data/lib/rley/lexical/token.rb +1 -1
data/lib/rley/ptree/non_terminal_node.rb +1 -1
data/lib/rley/rgn/all_notation_nodes.rb +5 -0
data/lib/rley/{notation → rgn}/ast_builder.rb +19 -12
data/lib/rley/{notation → rgn}/ast_node.rb +13 -12
data/lib/rley/{notation → rgn}/ast_visitor.rb +10 -10
data/lib/rley/rgn/composite_node.rb +28 -0
data/lib/rley/{notation → rgn}/grammar.rb +1 -1
data/lib/rley/{notation → rgn}/grammar_builder.rb +86 -124
data/lib/rley/{notation → rgn}/parser.rb +7 -7
data/lib/rley/rgn/repetition_node.rb +62 -0
data/lib/rley/rgn/sequence_node.rb +30 -0
data/lib/rley/{notation → rgn}/symbol_node.rb +15 -7
data/lib/rley/{notation → rgn}/tokenizer.rb +71 -60
data/lib/rley/syntax/grm_symbol.rb +0 -4
data/lib/rley/syntax/non_terminal.rb +4 -0
data/lib/rley/syntax/terminal.rb +10 -6
data/spec/rley/parser/dangling_else_spec.rb +3 -3
data/spec/rley/parser/gfg_earley_parser_spec.rb +48 -50
data/spec/rley/{notation → rgn}/grammar_builder_spec.rb +58 -54
data/spec/rley/{notation → rgn}/parser_spec.rb +36 -24
data/spec/rley/rgn/repetition_node_spec.rb +56 -0
data/spec/rley/rgn/sequence_node_spec.rb +48 -0
data/spec/rley/rgn/symbol_node_spec.rb +33 -0
data/spec/rley/{notation → rgn}/tokenizer_spec.rb +2 -2
data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
data/spec/rley/support/grammar_int_seq_helper.rb +2 -2
metadata +40 -33
data/lib/rley/notation/all_notation_nodes.rb +0 -4
data/lib/rley/notation/grouping_node.rb +0 -23
data/lib/rley/notation/sequence_node.rb +0 -35

data/lib/rley/{notation → rgn}/grammar_builder.rb RENAMED Viewed

@@ -7,8 +7,9 @@ require_relative 'ast_visitor'
 require_relative '../syntax/match_closest'
 module Rley # This module is used as a namespace
-  module Notation # This module is used as a namespace
-    # Structure used for production rules that are implicitly generated by Rley
+  # Namespace for classes that define RGN (Rley Grammar Notation)
+  module RGN # This module is used as a namespace
+    # Structure used by Rley to generate implicdit production rules.
     RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
     # Builder GoF pattern. Builder builds a complex object
@@ -19,7 +20,7 @@ module Rley # This module is used as a namespace
       #   to the matching grammar symbol object.
       attr_reader(:symbols)
-      # @return [Notation::Parser] Parser for the right-side of productions
+      # @return [RGN::Parser] Parser for the right-side of productions
       attr_reader(:parser)
       # @return [Hash{ASTVisitor, Array}]
@@ -32,21 +33,12 @@ module Rley # This module is used as a namespace
       # @return [Hash{String, String}] The synthesized raw productions
       attr_reader(:synthetized)
-      # Creates a new grammar builder.
+      # Creates a new RGN grammar builder.
       # @param aBlock [Proc] code block used to build the grammar.
-      # @example Building a tiny English grammar
-      #   builder = Rley::Notation::GrammarBuilder.new do
-      #     add_terminals('n', 'v', 'adj', 'det')
-      #     rule 'S' => 'NP VP'
-      #     rule 'VP' => 'v NP'
-      #     rule 'NP' => 'det n'
-      #     rule 'NP' => 'adj NP'
-      #   end
-      #   tiny_eng = builder.grammar
       def initialize(&aBlock)
         @symbols = {}
         @productions = []
-        @parser = Notation::Parser.new
+        @parser = RGN::Parser.new
         @visitor2rhs = {}
         @synthetized = {}
@@ -73,7 +65,7 @@ module Rley # This module is used as a namespace
       end
       # Add the given marker symbol to the grammar of the language
-      # @param aMarkerSymbol [String] A mazker symbol
+      # @param aMarkerSymbol [String] A marker symbol
       # @return [void]
       def add_marker(aMarkerSymbol)
         new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
@@ -227,105 +219,64 @@ module Rley # This module is used as a namespace
       # ################################
       def after_symbol_node(aSymbolNode, aVisitor)
         symb_name = aSymbolNode.name
-        case aSymbolNode.repetition
-        when :zero_or_one
-          # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
-          # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
-          name_modified = "#{symb_name}#{suffix_qmark}"
-          unless symbols.include? name_modified
-            add_nonterminal(name_modified)
-            add_raw_rule(name_modified, symb_name, suffix_qmark_one)
-            add_raw_rule(name_modified, '', suffix_qmark_none)
-          end
-          symb_name = name_modified
-        when :zero_or_more
-          # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
-          # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
-          name_modified = "#{symb_name}#{suffix_star}"
-          unless symbols.include? name_modified
-            add_nonterminal(name_modified)
-            add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
-            add_raw_rule(name_modified, [], suffix_star_none)
-          end
-          symb_name = name_modified
-        when :exactly_one
-          # Do nothing
-        when :one_or_more
-          name_modified = "#{symb_name}#{suffix_plus}"
-          unless symbols.include? name_modified
-            add_nonterminal(name_modified)
-            add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
-            add_raw_rule(name_modified, symb_name, suffix_plus_one)
-          end
-          symb_name = name_modified
-        else
-          raise StandardError, 'Unhandled multiplicity'
-        end
         symb = get_grm_symbol(symb_name)
         visitor2rhs[aVisitor] << symb
       end
       def after_sequence_node(aSequenceNode, _visitor)
-        aSequenceNode.subnodes.each_with_index do |sn, i|
-          next if sn.annotation.empty?
-          matching = sn.annotation['match_closest']
-          aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
-        end
+        add_constraints(aSequenceNode)
       end
-      def after_grouping_node(aGroupingNode, aVisitor)
-        after_sequence_node(aGroupingNode, aVisitor)
-        symb_name = sequence_name(aGroupingNode)
+      def after_repetition_node(aRepNode, aVisitor)
+        add_constraints(aRepNode)
+        return if aRepNode.repetition == :exactly_one
-        unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
-          add_nonterminal(symb_name)
-          rhs = serialize_sequence(aGroupingNode)
-          add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
+        node_name = aRepNode.name
+        child_name = aRepNode.subnodes[0].name
+        if aRepNode.child.is_a?(SequenceNode) &&
+           !symbols.include?(child_name) && aRepNode.repetition != :zero_or_one
+          add_nonterminal(child_name)
+          rhs = aRepNode.child.to_text
+          add_raw_rule(child_name, rhs, 'return_children', true)
         end
-        name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
-        case aGroupingNode.repetition
+        case aRepNode.repetition
         when :zero_or_one
-          # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
-          # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
-          unless symbols.include? name_modified
-            add_nonterminal(name_modified)
-            add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
-            add_raw_rule(name_modified, [], suffix_qmark_none, true)
+          # implicitly called: rule('node_name_qmark' => 'node_name_qmark').tag suffix_qmark_one
+          # implicitly called: rule('node_name_qmark' => '').tag suffix_qmark_none
+          unless symbols.include? node_name
+            add_nonterminal(node_name)
+            if aRepNode.child.is_a?(SequenceNode) && !aRepNode.child.constraints.empty?
+              aRepNode.constraints.merge(aRepNode.child.constraints)
+            end
+            rhs = aRepNode.child.to_text
+            add_raw_rule(node_name, rhs, 'return_children', false, aRepNode.constraints)
+            add_raw_rule(node_name, [], suffix_qmark_none, true)
           end
         when :zero_or_more
-          # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
-          # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
-          unless symbols.include? name_modified
-            add_nonterminal(name_modified)
-            add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
-            add_raw_rule(name_modified, '', suffix_star_none)
+          # implicitly called: rule('node_name_star' => 'node_name_star node_name').tag suffix_star_more
+          # implicitly called: rule('node_name_star' => '').tag suffix_star_none
+          unless symbols.include? node_name
+            add_nonterminal(node_name)
+            rhs = "#{node_name} #{child_name}"
+            add_raw_rule(node_name, rhs, suffix_star_more)
+            add_raw_rule(node_name, '', suffix_star_none)
           end
-        when :exactly_one
-          # Do nothing
         when :one_or_more
-          unless symbols.include? name_modified
-            add_nonterminal(name_modified)
-            add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
-            add_raw_rule(name_modified, symb_name, suffix_plus_one)
+          unless symbols.include? node_name
+            add_nonterminal(node_name)
+            add_raw_rule(node_name, "#{node_name} #{child_name}", suffix_plus_more)
+            add_raw_rule(node_name, child_name, suffix_plus_one)
           end
         else
           raise StandardError, 'Unhandled multiplicity'
         end
-        unless aGroupingNode.repetition == :exactly_one
-          symb = get_grm_symbol(name_modified)
-          visitor2rhs[aVisitor] << symb
-        end
+        symb = get_grm_symbol(node_name)
+        visitor2rhs[aVisitor] << symb
       end
       # A notification to the builderobject that the programmer
@@ -425,22 +376,33 @@ module Rley # This module is used as a namespace
         symbols[name]
       end
-      def sequence_name(aSequenceNode)
-        subnode_names = +''
-        aSequenceNode.subnodes.each do |subn|
-          case subn
-          when SymbolNode
-            subnode_names << "_#{subn.name}"
-          when SequenceNode
-            subnode_names << "_#{sequence_name(subn)}"
-          end
-          suffix = repetition2suffix(subn.repetition)
-          subnode_names << suffix
-        end
+      def add_constraints(aCompositeNode)
+        aCompositeNode.subnodes.each_with_index do |sn, i|
+          next if sn.annotation.empty?
-        "seq#{subnode_names}"
+          matching = sn.annotation['match_closest']
+          constraint = Syntax::MatchClosest.new(aCompositeNode, i, matching)
+          aCompositeNode.constraints << constraint
+        end
       end
+      # def sequence_name(aSequenceNode)
+      #   subnode_names = +''
+      #   aSequenceNode.subnodes.each do |subn|
+      #     case subn
+      #     when SymbolNode
+      #       subnode_names << "_#{subn.name}"
+      #     when SequenceNode
+      #       subnode_names << "_#{sequence_name(subn)}"
+      #     when RepetitionNode
+      #       suffix = repetition2suffix(subn.repetition)
+      #       subnode_names << suffix
+      #     end
+      #   end
+      #
+      #   "seq#{subnode_names}"
+      # end
       def node_base_name(aNode)
         if aNode.kind_of?(SymbolNode)
           aNode.name
@@ -456,23 +418,23 @@ module Rley # This module is used as a namespace
         "#{base_name}#{suffix}"
       end
-      def serialize_sequence(aSequenceNode)
-        text = +''
-        aSequenceNode.subnodes.each do |sn|
-          text << ' '
-          case sn
-          when SymbolNode
-            text << sn.name
-          when SequenceNode
-            text << sequence_name(sn)
-          end
-          suffix = repetition2suffix(sn.repetition)
-          text << suffix
-        end
-        text.strip
-      end
+      # def serialize_sequence(aSequenceNode)
+      #   text = +''
+      #   aSequenceNode.subnodes.each do |sn|
+      #     text << ' '
+      #     case sn
+      #     when SymbolNode
+      #       text << sn.name
+      #     when SequenceNode
+      #       text << sequence_name(sn)
+      #     when RepetitionNode
+      #       suffix = repetition2suffix(sn.repetition)
+      #       text << suffix
+      #     end
+      #   end
+      #
+      #   text.strip
+      # end
       def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
         raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
@@ -484,7 +446,7 @@ module Rley # This module is used as a namespace
       end
       def process_raw_rules
-        until synthetized.empty? do
+        until synthetized.empty?
           raw_rules = synthetized.delete(synthetized.keys.first)
           raw_rules.each do |raw|
             new_prod = nil
@@ -494,7 +456,7 @@ module Rley # This module is used as a namespace
               new_prod = rule(raw.lhs => raw.rhs)
             end
             new_prod.tag(raw.tag)
-            new_prod.constraints = raw.constraints
+            new_prod.constraints.concat(raw.constraints)
           end
         end
       end

data/lib/rley/{notation → rgn}/parser.rb RENAMED Viewed

@@ -5,8 +5,8 @@ require_relative 'grammar'
 require_relative 'ast_builder'
 module Rley
-  module Notation
-    # A Lox parser that produce concrete parse trees.
+  module RGN
+    # A RRN (Rley Rule Notation) parser that produce concrete parse trees.
     # Concrete parse trees are the default kind of parse tree
     # generated by the Rley library.
     # They consist of two node types only:
@@ -28,16 +28,16 @@ module Rley
         # Create a Rley facade object
         @engine = Rley::Engine.new do |cfg|
           cfg.diagnose = true
-          cfg.repr_builder = Notation::ASTBuilder
+          cfg.repr_builder = RGN::ASTBuilder
         end
         # Step 1. Load RGN grammar
-        @engine.use_grammar(Rley::Notation::RGNGrammar)
+        @engine.use_grammar(Rley::RGN::RGNGrammar)
       end
-      # Parse the given Lox program into a parse tree.
-      # @param source [String] Lox program to parse
-      # @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
+      # Parse the given RGN snippet into a parse tree.
+      # @param source [String] Snippet to parse
+      # @return [Rley::ParseTree] A parse tree equivalent to the RGN input.
       def parse(source)
         lexer = Tokenizer.new(source)
         result = engine.parse(lexer.tokens)

data/lib/rley/rgn/repetition_node.rb ADDED Viewed

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require_relative 'composite_node'
+module Rley
+  module RGN
+    # A RGN syntax node representing an expression quantified by a ?, * or +.
+    class RepetitionNode < CompositeNode
+      # @return [Symbol] one of: :zero_or_one, :zero_or_more, :one_or_more
+      attr_accessor :repetition
+      Repetition2suffix = {
+        zero_or_one: '_qmark',
+        zero_or_more: '_star',
+        exactly_one: '',
+        one_or_more: '_plus'
+      }.freeze
+      # @param child [Array<ASTNode>] sequence of AST nodes
+      # @param theRepetition [Symbol] how many times the child node can be repeated
+      def initialize(child, theRepetition)
+        super([child])
+        @repetition = theRepetition
+      end
+      # @return [RGN::ASTNode]
+      def child
+        subnodes[0]
+      end
+      # @return [String]
+      def name
+        child_name = subnodes[0].name
+        "rep_#{child_name}#{Repetition2suffix[repetition]}"
+      end
+      # @return [String]
+      def to_text
+        child_text = subnodes[0].to_text
+        "rep_#{child_text}#{Repetition2suffix[repetition]}"
+      end
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param visitor [RGN::ASTVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_repetition_node(self)
+      end
+      def suffix_qmark
+        Repetition2suffix[:zero_or_one]
+      end
+      def suffix_star
+        Repetition2suffix[:zero_or_more]
+      end
+      def suffix_plus
+        Repetition2suffix[:one_or_more]
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/rgn/sequence_node.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+require_relative 'composite_node'
+module Rley
+  module RGN
+    # A syntax node for a sequence of AST nodes
+    class SequenceNode < CompositeNode
+      def name
+        result = +''
+        subnodes.each do |sn|
+          result << "_#{sn.name}"
+        end
+        "seq#{result}"
+      end
+      def to_text
+        arr = subnodes.map(&:to_text)
+        arr.join(' ')
+      end
+      # Part of the 'visitee' role in Visitor design pattern.
+      # @param visitor [RGN::ASTVisitor] the visitor
+      def accept(visitor)
+        visitor.visit_sequence_node(self)
+      end
+    end # class
+  end # module
+end # module

data/lib/rley/{notation → rgn}/symbol_node.rb RENAMED Viewed

@@ -3,24 +3,32 @@
 require_relative 'ast_node'
 module Rley
-  module Notation
-    # A syntax node for a grammar symbol occurring in rhs of a rule
+  module RGN
+    # A syntax node for a grammar symbol occurring in rhs of a rule.
+    # symbol nodes are leaf nodes of RRN parse trees.
     class SymbolNode < ASTNode
+      # @return [Rley::Lexical::Position] Position of the entry in the input stream.
+      attr_reader :position
       # @return [String] name of grammar symbol
       attr_reader :name
       # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
       # @param aName [String] name of grammar symbol
-      # @param theRepetition [Symbol] indicates how many times the symbol can be repeated
-      def initialize(aPosition, aName, theRepetition = nil)
-        super(aPosition)
+      def initialize(aPosition, aName)
+        super()
+        @position = aPosition
         @name = aName
-        self.repetition = theRepetition if theRepetition
+      end
+      # @return [String] name of grammar symbol
+      def to_text
+        annotation.empty? ? name : "#{name} #{annotation_to_text}"
       end
       # Abstract method (must be overriden in subclasses).
       # Part of the 'visitee' role in Visitor design pattern.
-      # @param _visitor [LoxxyTreeVisitor] the visitor
+      # @param visitor [RGN::ASTVisitor] the visitor
       def accept(visitor)
         visitor.visit_symbol_node(self)
       end

data/lib/rley/{notation → rgn}/tokenizer.rb RENAMED Viewed

@@ -4,7 +4,7 @@ require 'strscan'
 require_relative '../lexical/token'
 module Rley
-  module Notation
+  module RGN
     # A tokenizer for the Rley notation language.
     # Responsibility: break input into a sequence of token objects.
     # The tokenizer should recognize:
@@ -14,6 +14,13 @@ module Rley
     # Delimiters: e.g. parentheses '(',  ')'
     # Separators: e.g. comma
     class Tokenizer
+      PATT_KEY = /[a-zA-Z_][a-zA-Z_0-9]*:/.freeze
+      PATT_INTEGER = /\d+/.freeze
+      PATT_NEWLINE = /(?:\r\n)|\r|\n/.freeze
+      PATT_STRING_START = /"|'/.freeze
+      PATT_SYMBOL = /[^?*+,:(){}\s]+/.freeze
+      PATT_WHITESPACE = /[ \t\f]+/.freeze
       # @return [StringScanner] Low-level input scanner
       attr_reader(:scanner)
@@ -24,7 +31,7 @@ module Rley
       attr_reader(:line_start)
       # One or two special character tokens.
-      @@lexeme2name = {
+      Lexeme2name = {
         '(' => 'LEFT_PAREN',
         ')' => 'RIGHT_PAREN',
         '{' => 'LEFT_BRACE',
@@ -41,19 +48,19 @@ module Rley
         match_closest repeat
       ].map { |x| [x, x] }.to_h
-      # Constructor. Initialize a tokenizer for Lox input.
-      # @param source [String] Lox text to tokenize.
+      # Constructor. Initialize a tokenizer for RGN input.
+      # @param source [String] RGN text to tokenize.
       def initialize(source = nil)
-        @scanner = StringScanner.new('')
-        start_with(source) if source
+        reset
+        input = source || ''
+        @scanner = StringScanner.new(input)
       end
       # Reset the tokenizer and make the given text, the current input.
-      # @param source [String] Lox text to tokenize.
+      # @param source [String] RGN text to tokenize.
       def start_with(source)
+        reset
         @scanner.string = source
-        @lineno = 1
-        @line_start = 0
       end
       # Scan the source and return an array of tokens.
@@ -65,47 +72,67 @@ module Rley
           tok_sequence << token unless token.nil?
         end
-        return tok_sequence
+        tok_sequence
       end
       private
-      def _next_token
-        pos_before = scanner.pos
-        skip_intertoken_spaces
-        ws_found = true if scanner.pos > pos_before
-        curr_ch = scanner.peek(1)
-        return nil if curr_ch.nil? || curr_ch.empty?
+      def reset
+        @lineno = 1
+        @line_start = 0
+      end
+      def _next_token
         token = nil
+        ws_found = false
-        if '(){},'.include? curr_ch
-          # Single delimiter, separator or character
-          token = build_token(@@lexeme2name[curr_ch], scanner.getch)
-        elsif '?*+,'.include? curr_ch # modifier character
-          # modifiers without prefix text are symbols
-          symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
-          token = build_token(symb, scanner.getch)
-        elsif (lexeme = scanner.scan(/\.\./))
-          # One or two special character tokens
-          token = build_token(@@lexeme2name[lexeme], lexeme)
-        elsif scanner.check(/"|'/) # Start of string detected...
-          token = build_string_token
-        elsif (lexeme = scanner.scan(/\d+/))
-          token = build_token('INT_LIT', lexeme)
-        elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
-          keyw = @@keywords[lexeme.chop!]
-          token = build_token('KEY', lexeme) if keyw
-          # ... error case
-        elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
-           token = build_token('SYMBOL', lexeme)
-        else # Unknown token
-          col = scanner.pos - @line_start + 1
-          _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
-          raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
-        end
+        # Loop until end of input reached or token found
+        until token || scanner.eos?
-        return token
+          nl_found = scanner.skip(PATT_NEWLINE)
+          if nl_found
+            next_line_scanned
+            next
+          end
+          if scanner.skip(PATT_WHITESPACE) # Skip whitespaces
+            ws_found = true
+            next
+          end
+          curr_ch = scanner.peek(1)
+          if '(){},'.include? curr_ch
+            # Single delimiter, separator or character
+            token = build_token(Lexeme2name[curr_ch], scanner.getch)
+          elsif '?*+,'.include? curr_ch # modifier character
+            # modifiers without prefix text are symbols
+            symb = (ws_found || nl_found) ? 'SYMBOL' : Lexeme2name[curr_ch]
+            token = build_token(symb, scanner.getch)
+          elsif (lexeme = scanner.scan(/\.\./))
+            # One or two special character tokens
+            token = build_token(Lexeme2name[lexeme], lexeme)
+          elsif scanner.check(PATT_STRING_START) # Start of string detected...
+            token = build_string_token
+          elsif (lexeme = scanner.scan(PATT_INTEGER))
+            token = build_token('INT_LIT', lexeme)
+          elsif (lexeme = scanner.scan(PATT_KEY))
+            keyw = @@keywords[lexeme.chop!]
+            token = build_token('KEY', lexeme) if keyw
+            # ... error case
+          elsif (lexeme = scanner.scan(PATT_SYMBOL))
+             token = build_token('SYMBOL', lexeme)
+          else # Unknown token
+            col = scanner.pos - @line_start + 1
+            _erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
+            raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
+          end
+          ws_found = false
+        end # until
+        # unterminated(@string_start.line, @string_start.column) if state == :multiline
+        token
+        # return token
       end
       def build_token(aSymbolName, aLexeme)
@@ -154,24 +181,8 @@ module Rley
         Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
       end
-      # Skip non-significant whitespaces and comments.
-      # Advance the scanner until something significant is found.
-      def skip_intertoken_spaces
-        loop do
-          ws_found = scanner.skip(/[ \t\f]+/) ? true : false
-          nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
-          if nl_found
-            ws_found = true
-            next_line
-          end
-          break unless ws_found
-        end
-        scanner.pos
-      end
-      def next_line
+      # Event: next line detected.
+      def next_line_scanned
         @lineno += 1
         @line_start = scanner.pos
       end

data/lib/rley/syntax/grm_symbol.rb CHANGED Viewed

@@ -8,10 +8,6 @@ module Rley # This module is used as a namespace
       # @return [String] The name of the grammar symbol
       attr_reader(:name)
-      # An indicator that tells whether the grammar symbol can generate a
-      # non-empty string of terminals.
-      attr_writer(:generative)
       # Constructor.
       # aName [String] The name of the grammar symbol.
       def initialize(aName)

data/lib/rley/syntax/non_terminal.rb CHANGED Viewed

@@ -7,6 +7,10 @@ module Rley # This module is used as a namespace
     # A non-terminal symbol (sometimes called a syntactic variable) represents
     # a composition of terminal or non-terminal symbols
     class NonTerminal < GrmSymbol
+      # An indicator that tells whether the grammar symbol can generate a
+      # non-empty string of terminals.
+      attr_writer(:generative)
       # A non-terminal symbol is nullable if it can match an empty string.
       attr_writer(:nullable)