RubyGems - dhaka - Versions diffs - 2.1.0 → 2.2.0 - Mend

dhaka 2.1.0 → 2.2.0

Files changed (39) hide show

data/lib/evaluator/evaluator.rb +18 -17
data/lib/grammar/grammar.rb +4 -5
data/lib/lexer/dfa.rb +63 -13
data/lib/lexer/lexeme.rb +3 -4
data/lib/lexer/lexer.rb +12 -3
data/lib/lexer/lexer_run.rb +22 -10
data/lib/lexer/regex_grammar.rb +88 -14
data/lib/lexer/regex_parser.rb +1523 -1401
data/lib/lexer/specification.rb +29 -3
data/lib/lexer/state.rb +32 -9
data/lib/lexer/state_machine.rb +2 -2
data/lib/parser/channel.rb +4 -4
data/lib/parser/parser.rb +17 -12
data/lib/parser/parser_state.rb +3 -1
data/test/chittagong/chittagong_lexer.rb +63 -63
data/test/chittagong/chittagong_lexer.rb.rej +189 -0
data/test/chittagong/chittagong_lexer_specification.rb +6 -8
data/test/chittagong/chittagong_parser.rb +659 -659
data/test/chittagong/chittagong_parser.rb.rej +1623 -0
data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
data/test/core/dfa_test.rb +170 -0
data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
data/test/core/lexer_test.rb +139 -0
data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
data/test/{parser_test.rb → core/parser_test.rb} +2 -2
data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
metadata +25 -22
data/test/lexer_test.rb +0 -215

data/lib/evaluator/evaluator.rb CHANGED Viewed

@@ -55,24 +55,24 @@ module Dhaka
   #
   #    end
-  class Evaluator
+  class Evaluator < SimpleDelegator
     class << self
-      # Defining evaluation rules within a block passed to this method tells the evaluator to carry out a
-      # rudimentary check of your definitions and define default evaluation rules for pass-through
-      # productions (i.e. productions with expansions consisting of exactly one grammar symbol). The
-      # default evaluation rule for such productions is to simply return the result of calling +evaluate+
-      # on the unique child node. If you neglect to define a rule for a non-pass-through production (one
-      # where the expansion consists of multiple symbols), the evaluator will raise an exception
-      # at loading time, listing all the productions that absolutely need to be defined before you can
-      # continue.
-      def define_evaluation_rules
+      # Define evaluation rules within a block passed to this method. The evaluator will define
+      # default evaluation rules for pass-through productions (i.e. productions with expansions
+      # consisting of exactly one grammar symbol). The default evaluation rule for such productions
+      # is to simply return the result of calling +evaluate+ on the unique child node. Setting the
+      # <tt>:raise_error</tt> option to true tells the evaluator to throw an exception if you neglect
+      # to define a rule for a non-pass-through production (one where the expansion consists of
+      # multiple symbols), listing all the productions that absolutely need to be defined before you
+      # can continue.
+      def define_evaluation_rules(options = {})
         yield
-        check_definitions
+        check_definitions(options)
       end
       private
-      def check_definitions
+      def check_definitions(options)
         filter = lambda {|productions| productions.map {|production| production.name} - actions}
         pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
         pass_through_productions_without_rules.each do |rule_name|
@@ -81,7 +81,7 @@ module Dhaka
           end
         end
         non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
-        raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
+        raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty? || !options[:raise_error]
       end
       def inherited(evaluator)
@@ -107,15 +107,16 @@ module Dhaka
     # Evaluate a parse tree node.
     def evaluate node
       @node_stack ||= []
-      @node_stack << node.child_nodes
+      @node_stack << node
+      __setobj__(@node_stack.last)
       result      = send(node.production.name)
       @node_stack.pop
+      __setobj__(@node_stack.last)
       result
     end
-    # Returns the array of child nodes of the node being evaluated currently.
-    def child_nodes
-      @node_stack.last
+    def initialize
     end
   end

data/lib/grammar/grammar.rb CHANGED Viewed

@@ -131,17 +131,16 @@ module Dhaka
       end
       def closure(kernel) #:nodoc:
-        channels = Set.new
+        channels = Hash.new {|hash, start_item| hash[start_item] = Set.new}
         result = compute_closure(kernel) do |hash, item|
           if item.next_symbol and item.next_symbol.non_terminal
             productions_by_symbol[item.next_symbol].each do |production|
-              channels << spontaneous_channel(item, hash[Item.new(production, 0)])
+              new_channel = spontaneous_channel(item, hash[Item.new(production, 0)])
+              channels[item] << new_channel
             end
           end
         end
-        [channels, result]
+        [result, channels]
       end
       def passive_channel(start_item, end_item) #:nodoc:

data/lib/lexer/dfa.rb CHANGED Viewed

@@ -5,6 +5,22 @@ module Dhaka
     # in a LexerSpecification
     class InvalidRegexException < StandardError
     end
+    class CheckpointAction
+      attr_reader :pattern
+      def initialize(pattern)
+        @pattern = pattern
+      end
+      def call(lexer_run)
+        lexer_run.save_checkpoint(pattern)
+      end
+      def compile_to_ruby_source
+        "add_checkpoint(#{pattern.inspect})"
+      end
+    end
     class DFA < StateMachine #:nodoc:
       def initialize(regex)
@@ -12,7 +28,7 @@ module Dhaka
         tokenize_result = RegexTokenizer.tokenize(@regex)
         raise InvalidRegexException.new(tokenize_error_message(tokenize_result)) if tokenize_result.has_error?
         parse_result = RegexParser.parse(tokenize_result)
         raise InvalidRegexException.new(parse_error_message(parse_result)) if parse_result.has_error?
@@ -44,28 +60,62 @@ module Dhaka
       end
       def new_state_for_key key
-         accepting = key.detect {|position| position.accepting}
-         State.new(self, accepting && @regex)
+        accepting = key.detect {|position| position.accepting}
+        if accepting
+          new_state = State.new(self, accepting.action(@regex))
+        else
+          new_state = State.new(self)
+        end
+        if key.any? {|position| position.checkpoint}
+          new_state.checkpoint_actions << CheckpointAction.new(@regex)
+        end
+        new_state
       end
       def transition_characters key
         result = Set.new
         key.each do |node|
-          result << node.character unless node.accepting
+          result << node.character unless (node.accepting || node.checkpoint)
         end
         result
       end
-      def matches(string)
-        curr_state = @start_state
-        string.unpack("C*").each do |i|
-          dest_state = curr_state.transitions[i.chr]
-          return false unless dest_state
-          curr_state = dest_state
+      def match(input)
+        DFARun.new(self, input).match
+      end
+    end
+    class DFARun
+      def initialize(dfa, input)
+        @dfa, @input = dfa, input
+        @matched = ""
+        @not_yet_accepted = ""
+        @curr_state = @dfa.start_state
+      end
+      def match
+        @input.unpack("C*").each do |i|
+          break unless dest_state = @curr_state.transitions[i.chr]
+          @not_yet_accepted << i.chr
+          @curr_state = dest_state
+          @curr_state.process(self)
         end
-        return curr_state.accepting?
+        @matched
+      end
+      def save_checkpoint(pattern)
+        @last_saved_checkpoint = @matched + @not_yet_accepted
+      end
+      def accept(pattern)
+        @matched.concat @not_yet_accepted
+        @not_yet_accepted = ""
+      end
+      def accept_last_saved_checkpoint(pattern)
+        @matched = @last_saved_checkpoint
+        @not_yet_accepted = ""
       end
     end
   end
-end
+end

data/lib/lexer/lexeme.rb CHANGED Viewed

@@ -2,11 +2,10 @@ module Dhaka
   # Represents a portion of the input string that has been recognized as matching a given lexer pattern.
   class Lexeme
     # The pattern matched by this lexeme.
-    attr_accessor :pattern
+    attr_accessor :pattern, :characters
     # +input_position+ is the index in the input stream that this lexeme starts at.
     attr_reader :input_position
-    attr_reader :characters
     def initialize(input_position) #:nodoc:
       @input_position = input_position
@@ -23,11 +22,11 @@ module Dhaka
     end
     def << char #:nodoc:
-      @characters << char
+      characters << char
     end
     def concat chars #:nodoc:
-      @characters.concat chars
+      characters.concat chars
     end
   end
 end

data/lib/lexer/lexer.rb CHANGED Viewed

@@ -41,8 +41,17 @@ module Dhaka
     private
       def new_state_for_key key
-        item = key.select {|state| state.accepting?}.collect {|state| @specification.items[state.pattern]}.min
-        LexerSupport::State.new(self, item && item.pattern)
+        accepting_states = key.select {|state| state.accepting?}
+        unless accepting_states.empty?
+          highest_precedence_state = accepting_states.min {|a, b| @specification.items[a.action.pattern] <=> @specification.items[b.action.pattern]}
+          new_state = LexerSupport::State.new(self, highest_precedence_state.action)
+        else
+          new_state = LexerSupport::State.new(self)
+        end
+        key.select {|state| !state.checkpoint_actions.empty?}.each do |state|
+          new_state.checkpoint_actions.concat state.checkpoint_actions
+        end
+        new_state
       end
       def transition_characters states
@@ -58,4 +67,4 @@ module Dhaka
         result
       end
   end
-end
+end

data/lib/lexer/lexer_run.rb CHANGED Viewed

@@ -8,11 +8,12 @@ module Dhaka
       @lexer, @input          = lexer, input
       @input_position         = 0
       @not_yet_accepted_chars = []
+      @last_saved_checkpoints = {}
     end
     # Constructs a token of type +symbol_name+ from the +current_lexeme+.
-    def create_token(symbol_name)
-      Token.new(symbol_name, @current_lexeme.characters.join, @current_lexeme.input_position)
+    def create_token(symbol_name, value = current_lexeme.characters.join)
+      Token.new(symbol_name, value, current_lexeme.input_position)
     end
     # Yields each token as it is recognized. Returns a TokenizerErrorResult if an error occurs during tokenization.
@@ -29,20 +30,31 @@ module Dhaka
           reset_and_rewind
         else
           @curr_state = dest_state
-          if @curr_state.accepting?
-            @current_lexeme.pattern = @curr_state.pattern
-            @current_lexeme.concat @not_yet_accepted_chars
-            @not_yet_accepted_chars = []
-            @current_lexeme << c
-          else
-            @not_yet_accepted_chars << c
-          end
+          @not_yet_accepted_chars << c
+          @curr_state.process(self)
           advance
         end
       end
       yield Token.new(END_SYMBOL_NAME, nil, nil)
     end
+    def accept(pattern) #:nodoc:
+      @current_lexeme.pattern = pattern
+      @current_lexeme.concat @not_yet_accepted_chars
+      @not_yet_accepted_chars = []
+    end
+    def save_checkpoint(pattern) #:nodoc:
+      @last_saved_checkpoints[pattern] = (@current_lexeme.characters + @not_yet_accepted_chars)
+    end
+    def accept_last_saved_checkpoint(pattern) #:nodoc:
+      @current_lexeme.pattern = pattern
+      @current_lexeme.concat @not_yet_accepted_chars
+      @not_yet_accepted_chars = @current_lexeme.characters[(@last_saved_checkpoints[pattern].size)..-1]
+      @current_lexeme.characters = @last_saved_checkpoints[pattern].dup
+    end
     private
       def reset_and_rewind
         @input_position -= @not_yet_accepted_chars.size

data/lib/lexer/regex_grammar.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Dhaka
     LOWERCASE_LETTERS       = ('a'..'z').to_a
     UPPERCASE_LETTERS       = ('A'..'Z').to_a
     LETTERS                 = LOWERCASE_LETTERS + UPPERCASE_LETTERS
-    WHITESPACE              = [" ", "\n", "\t"]
+    WHITESPACE              = [" ", "\r", "\n", "\t"]
     SYMBOLS                 = %w| ~ ` ! @ # % & _ = : ; " ' < , > - |
     CLASSES                 = {'d' => DIGITS, 'w' => LETTERS, 's' => WHITESPACE}
@@ -22,9 +22,10 @@ module Dhaka
     class RegexGrammar < Dhaka::Grammar
       for_symbol(Dhaka::START_SYMBOL_NAME) do
-        regex                         %w| Disjunction |                         do RootNode.new(child_nodes[0]) end
-      end
+        regex                         %w| Disjunction |                         do RootNode.new(child_nodes[0], AcceptingNode.new) end
+        regex_with_lookahead          %w| Disjunction / Disjunction |           do RootNode.new(LookaheadNode.new(child_nodes[0], child_nodes[2]), LookaheadAcceptingNode.new) end
+      end
       for_symbol('Disjunction') do
         disjunction                   %w| Alternative \| Disjunction |          do OrNode.new(child_nodes[0], child_nodes[2]) end
         alternative                   %w| Alternative |                         do child_nodes[0] end
@@ -45,7 +46,7 @@ module Dhaka
       for_symbol('Atom') do
         group                         %w| ( Disjunction ) |                     do child_nodes[1] end
         char                          %w| Character |                           do LeafNode.new(child_nodes[0]) end
-        anything                      %w| . |                                   do OrNode.new(*(ALL_CHARACTERS - ["\n"]).collect {|char| LeafNode.new(char)}) end
+        anything                      %w| . |                                   do OrNode.new(*(ALL_CHARACTERS - ["\r", "\n"]).collect {|char| LeafNode.new(char)}) end
         positive_set                  %w| [ SetContents ] |                     do OrNode.new(*child_nodes[1].collect{|char| LeafNode.new(char)}) end
         negative_set                  %w| [ ^ SetContents ] |                   do OrNode.new(*(ALL_CHARACTERS - child_nodes[2]).collect {|char| LeafNode.new(char)}) end
@@ -145,6 +146,10 @@ module Dhaka
     class ASTNode
+      def checkpoint
+        false
+      end
       def accepting
         false
       end
@@ -184,15 +189,19 @@ module Dhaka
       end
       def first
-        children.inject(Set.new([])) do |result, child|
-          result | child.first
+        result = Set.new
+        children.each do |child|
+          result.merge child.first
         end
+        result
       end
       def last
-        children.inject(Set.new([])) do |result, child|
-          result | child.last
+        result = Set.new
+        children.each do |child|
+          result.merge child.last
         end
+        result
       end
       def to_dot(graph)
@@ -234,6 +243,19 @@ module Dhaka
         end
       end
     end
+    class LookaheadNode < CatNode
+      def label
+        "/"
+      end
+      def calculate_follow_sets
+        super
+        left.last.each do |leaf_node|
+          leaf_node.follow_set.merge(Set.new([CheckpointNode.new]))
+        end
+      end
+    end
     class UnaryNode < ASTNode
       attr_reader :child
@@ -265,10 +287,6 @@ module Dhaka
     end
     class RootNode < CatNode
-      def initialize(left)
-        super(left, AcceptingNode.new())
-      end
       def label
         "start"
       end
@@ -344,6 +362,19 @@ module Dhaka
       def calculate_follow_sets
       end
     end
+    class CheckpointNode < ASTNode
+      def to_dot(graph)
+        graph.node(self, :label => "lookahead")
+      end
+      def character
+      end
+      def checkpoint
+        true
+      end
+    end
     class AcceptingNode < ASTNode
       def accepting
@@ -352,6 +383,10 @@ module Dhaka
       def character
       end
+      def action(pattern)
+        AcceptAction.new(pattern)
+      end
       def first
         Set.new([self])
@@ -364,5 +399,44 @@ module Dhaka
         graph.node(self, :label => '#')
       end
     end
+    class LookaheadAcceptingNode < AcceptingNode
+      def action(pattern)
+        LookaheadAcceptAction.new(pattern)
+      end
+    end
+    class AcceptAction
+      attr_reader :pattern
+      def initialize(pattern)
+        @pattern = pattern
+      end
+      def call(lexer_run)
+        lexer_run.accept(pattern)
+      end
+      def compile_to_ruby_source
+        "accept(#{pattern.inspect})"
+      end
+      def to_dot
+        "Accept #{pattern.inspect}"
+      end
+    end
+    class LookaheadAcceptAction < AcceptAction
+      def call(lexer_run)
+          lexer_run.accept_last_saved_checkpoint(pattern)
+      end
+      def compile_to_ruby_source
+        "accept_with_lookahead(#{pattern.inspect})"
+      end
+      def to_dot
+        "Accept With Lookahead #{pattern.inspect}"
+      end
+    end
   end
-end
+end