RubyGems - dhaka - Versions diffs - 2.1.0 → 2.2.0 - Mend

dhaka 2.1.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

data/lib/evaluator/evaluator.rb +18 -17
data/lib/grammar/grammar.rb +4 -5
data/lib/lexer/dfa.rb +63 -13
data/lib/lexer/lexeme.rb +3 -4
data/lib/lexer/lexer.rb +12 -3
data/lib/lexer/lexer_run.rb +22 -10
data/lib/lexer/regex_grammar.rb +88 -14
data/lib/lexer/regex_parser.rb +1523 -1401
data/lib/lexer/specification.rb +29 -3
data/lib/lexer/state.rb +32 -9
data/lib/lexer/state_machine.rb +2 -2
data/lib/parser/channel.rb +4 -4
data/lib/parser/parser.rb +17 -12
data/lib/parser/parser_state.rb +3 -1
data/test/chittagong/chittagong_lexer.rb +63 -63
data/test/chittagong/chittagong_lexer.rb.rej +189 -0
data/test/chittagong/chittagong_lexer_specification.rb +6 -8
data/test/chittagong/chittagong_parser.rb +659 -659
data/test/chittagong/chittagong_parser.rb.rej +1623 -0
data/test/{another_lalr_but_not_slr_grammar.rb → core/another_lalr_but_not_slr_grammar.rb} +1 -1
data/test/{compiled_parser_test.rb → core/compiled_parser_test.rb} +1 -1
data/test/core/dfa_test.rb +170 -0
data/test/{evaluator_test.rb → core/evaluator_test.rb} +3 -3
data/test/{grammar_test.rb → core/grammar_test.rb} +3 -3
data/test/{lalr_but_not_slr_grammar.rb → core/lalr_but_not_slr_grammar.rb} +0 -0
data/test/core/lexer_test.rb +139 -0
data/test/{malformed_grammar.rb → core/malformed_grammar.rb} +0 -0
data/test/{malformed_grammar_test.rb → core/malformed_grammar_test.rb} +1 -1
data/test/{nullable_grammar.rb → core/nullable_grammar.rb} +0 -0
data/test/{parse_result_test.rb → core/parse_result_test.rb} +1 -1
data/test/{parser_state_test.rb → core/parser_state_test.rb} +1 -1
data/test/{parser_test.rb → core/parser_test.rb} +2 -2
data/test/{precedence_grammar.rb → core/precedence_grammar.rb} +0 -0
data/test/{precedence_grammar_test.rb → core/precedence_grammar_test.rb} +1 -1
data/test/{rr_conflict_grammar.rb → core/rr_conflict_grammar.rb} +0 -0
data/test/{simple_grammar.rb → core/simple_grammar.rb} +0 -0
data/test/{sr_conflict_grammar.rb → core/sr_conflict_grammar.rb} +0 -0
metadata +25 -22
data/test/lexer_test.rb +0 -215

data/lib/evaluator/evaluator.rb CHANGED Viewed

@@ -55,24 +55,24 @@ module Dhaka
   #
   #    end
-  class Evaluator
+  class Evaluator < SimpleDelegator
     class << self
-      # Defining evaluation rules within a block passed to this method tells the evaluator to carry out a
-      # rudimentary check of your definitions and define default evaluation rules for pass-through
-      # productions (i.e. productions with expansions consisting of exactly one grammar symbol). The
-      # default evaluation rule for such productions is to simply return the result of calling +evaluate+
-      # on the unique child node. If you neglect to define a rule for a non-pass-through production (one
-      # where the expansion consists of multiple symbols), the evaluator will raise an exception
-      # at loading time, listing all the productions that absolutely need to be defined before you can
-      # continue.
-      def define_evaluation_rules
+      # Define evaluation rules within a block passed to this method. The evaluator will define
+      # default evaluation rules for pass-through productions (i.e. productions with expansions
+      # consisting of exactly one grammar symbol). The default evaluation rule for such productions
+      # is to simply return the result of calling +evaluate+ on the unique child node. Setting the
+      # <tt>:raise_error</tt> option to true tells the evaluator to throw an exception if you neglect
+      # to define a rule for a non-pass-through production (one where the expansion consists of
+      # multiple symbols), listing all the productions that absolutely need to be defined before you
+      # can continue.
+      def define_evaluation_rules(options = {})
         yield
-        check_definitions
+        check_definitions(options)
       end
       private
-      def check_definitions
+      def check_definitions(options)
         filter = lambda {|productions| productions.map {|production| production.name} - actions}
         pass_through_productions_without_rules = filter[grammar.productions.select {|production| production.expansion.size == 1}]
         pass_through_productions_without_rules.each do |rule_name|
@@ -81,7 +81,7 @@ module Dhaka
           end
         end
         non_trivial_productions_with_rules_undefined = filter[grammar.productions.select {|production| production.expansion.size != 1}]
-        raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
+        raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty? || !options[:raise_error]
       end
       def inherited(evaluator)
@@ -107,15 +107,16 @@ module Dhaka
     # Evaluate a parse tree node.
     def evaluate node
       @node_stack ||= []
-      @node_stack << node.child_nodes
+      @node_stack << node
+      __setobj__(@node_stack.last)
       result      = send(node.production.name)
       @node_stack.pop
+      __setobj__(@node_stack.last)
       result
     end
-    # Returns the array of child nodes of the node being evaluated currently.
-    def child_nodes
-      @node_stack.last
+    def initialize
     end
   end

data/lib/grammar/grammar.rb CHANGED Viewed

@@ -131,17 +131,16 @@ module Dhaka
       end
       def closure(kernel) #:nodoc:
-        channels = Set.new
+        channels = Hash.new {|hash, start_item| hash[start_item] = Set.new}
         result = compute_closure(kernel) do |hash, item|
           if item.next_symbol and item.next_symbol.non_terminal
             productions_by_symbol[item.next_symbol].each do |production|
-              channels << spontaneous_channel(item, hash[Item.new(production, 0)])
+              new_channel = spontaneous_channel(item, hash[Item.new(production, 0)])
+              channels[item] << new_channel
             end
           end
         end
-        [channels, result]
+        [result, channels]
       end
       def passive_channel(start_item, end_item) #:nodoc:

data/lib/lexer/dfa.rb CHANGED Viewed

@@ -5,6 +5,22 @@ module Dhaka
     # in a LexerSpecification
     class InvalidRegexException < StandardError
     end
+    class CheckpointAction
+      attr_reader :pattern
+      def initialize(pattern)
+        @pattern = pattern
+      end
+      def call(lexer_run)
+        lexer_run.save_checkpoint(pattern)
+      end
+      def compile_to_ruby_source
+        "add_checkpoint(#{pattern.inspect})"
+      end
+    end
     class DFA < StateMachine #:nodoc:
       def initialize(regex)
@@ -12,7 +28,7 @@ module Dhaka
         tokenize_result = RegexTokenizer.tokenize(@regex)
         raise InvalidRegexException.new(tokenize_error_message(tokenize_result)) if tokenize_result.has_error?
         parse_result = RegexParser.parse(tokenize_result)
         raise InvalidRegexException.new(parse_error_message(parse_result)) if parse_result.has_error?
@@ -44,28 +60,62 @@ module Dhaka
       end
       def new_state_for_key key
-         accepting = key.detect {|position| position.accepting}
-         State.new(self, accepting && @regex)
+        accepting = key.detect {|position| position.accepting}
+        if accepting
+          new_state = State.new(self, accepting.action(@regex))
+        else
+          new_state = State.new(self)
+        end
+        if key.any? {|position| position.checkpoint}
+          new_state.checkpoint_actions << CheckpointAction.new(@regex)
+        end
+        new_state
       end
       def transition_characters key
         result = Set.new
         key.each do |node|
-          result << node.character unless node.accepting
+          result << node.character unless (node.accepting || node.checkpoint)
         end
         result
       end
-      def matches(string)
-        curr_state = @start_state
-        string.unpack("C*").each do |i|
-          dest_state = curr_state.transitions[i.chr]
-          return false unless dest_state
-          curr_state = dest_state
+      def match(input)
+        DFARun.new(self, input).match
+      end
+    end
+    class DFARun
+      def initialize(dfa, input)
+        @dfa, @input = dfa, input
+        @matched = ""
+        @not_yet_accepted = ""
+        @curr_state = @dfa.start_state
+      end
+      def match
+        @input.unpack("C*").each do |i|
+          break unless dest_state = @curr_state.transitions[i.chr]
+          @not_yet_accepted << i.chr
+          @curr_state = dest_state
+          @curr_state.process(self)
         end
-        return curr_state.accepting?
+        @matched
+      end
+      def save_checkpoint(pattern)
+        @last_saved_checkpoint = @matched + @not_yet_accepted
+      end
+      def accept(pattern)
+        @matched.concat @not_yet_accepted
+        @not_yet_accepted = ""
+      end
+      def accept_last_saved_checkpoint(pattern)
+        @matched = @last_saved_checkpoint
+        @not_yet_accepted = ""
       end
     end
   end
-end
+end

data/lib/lexer/lexeme.rb CHANGED Viewed

@@ -2,11 +2,10 @@ module Dhaka
   # Represents a portion of the input string that has been recognized as matching a given lexer pattern.
   class Lexeme
     # The pattern matched by this lexeme.
-    attr_accessor :pattern
+    attr_accessor :pattern, :characters
     # +input_position+ is the index in the input stream that this lexeme starts at.
     attr_reader :input_position
-    attr_reader :characters
     def initialize(input_position) #:nodoc:
       @input_position = input_position
@@ -23,11 +22,11 @@ module Dhaka
     end
     def << char #:nodoc:
-      @characters << char
+      characters << char
     end
     def concat chars #:nodoc:
-      @characters.concat chars
+      characters.concat chars
     end
   end
 end

data/lib/lexer/lexer.rb CHANGED Viewed

@@ -41,8 +41,17 @@ module Dhaka
     private
       def new_state_for_key key
-        item = key.select {|state| state.accepting?}.collect {|state| @specification.items[state.pattern]}.min
-        LexerSupport::State.new(self, item && item.pattern)
+        accepting_states = key.select {|state| state.accepting?}
+        unless accepting_states.empty?
+          highest_precedence_state = accepting_states.min {|a, b| @specification.items[a.action.pattern] <=> @specification.items[b.action.pattern]}
+          new_state = LexerSupport::State.new(self, highest_precedence_state.action)
+        else
+          new_state = LexerSupport::State.new(self)
+        end
+        key.select {|state| !state.checkpoint_actions.empty?}.each do |state|
+          new_state.checkpoint_actions.concat state.checkpoint_actions
+        end
+        new_state
       end
       def transition_characters states
@@ -58,4 +67,4 @@ module Dhaka
         result
       end
   end
-end
+end

data/lib/lexer/lexer_run.rb CHANGED Viewed

@@ -8,11 +8,12 @@ module Dhaka
       @lexer, @input          = lexer, input
       @input_position         = 0
       @not_yet_accepted_chars = []
+      @last_saved_checkpoints = {}
     end
     # Constructs a token of type +symbol_name+ from the +current_lexeme+.
-    def create_token(symbol_name)
-      Token.new(symbol_name, @current_lexeme.characters.join, @current_lexeme.input_position)
+    def create_token(symbol_name, value = current_lexeme.characters.join)
+      Token.new(symbol_name, value, current_lexeme.input_position)
     end
     # Yields each token as it is recognized. Returns a TokenizerErrorResult if an error occurs during tokenization.
@@ -29,20 +30,31 @@ module Dhaka
           reset_and_rewind
         else
           @curr_state = dest_state
-          if @curr_state.accepting?
-            @current_lexeme.pattern = @curr_state.pattern
-            @current_lexeme.concat @not_yet_accepted_chars
-            @not_yet_accepted_chars = []
-            @current_lexeme << c
-          else
-            @not_yet_accepted_chars << c
-          end
+          @not_yet_accepted_chars << c
+          @curr_state.process(self)
           advance
         end
       end
       yield Token.new(END_SYMBOL_NAME, nil, nil)
     end
+    def accept(pattern) #:nodoc:
+      @current_lexeme.pattern = pattern
+      @current_lexeme.concat @not_yet_accepted_chars
+      @not_yet_accepted_chars = []
+    end
+    def save_checkpoint(pattern) #:nodoc:
+      @last_saved_checkpoints[pattern] = (@current_lexeme.characters + @not_yet_accepted_chars)
+    end
+    def accept_last_saved_checkpoint(pattern) #:nodoc:
+      @current_lexeme.pattern = pattern
+      @current_lexeme.concat @not_yet_accepted_chars
+      @not_yet_accepted_chars = @current_lexeme.characters[(@last_saved_checkpoints[pattern].size)..-1]
+      @current_lexeme.characters = @last_saved_checkpoints[pattern].dup
+    end
     private
       def reset_and_rewind
         @input_position -= @not_yet_accepted_chars.size

data/lib/lexer/regex_grammar.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Dhaka
     LOWERCASE_LETTERS       = ('a'..'z').to_a
     UPPERCASE_LETTERS       = ('A'..'Z').to_a
     LETTERS                 = LOWERCASE_LETTERS + UPPERCASE_LETTERS
-    WHITESPACE              = [" ", "\n", "\t"]
+    WHITESPACE              = [" ", "\r", "\n", "\t"]
     SYMBOLS                 = %w| ~ ` ! @ # % & _ = : ; " ' < , > - |
     CLASSES                 = {'d' => DIGITS, 'w' => LETTERS, 's' => WHITESPACE}
@@ -22,9 +22,10 @@ module Dhaka
     class RegexGrammar < Dhaka::Grammar
       for_symbol(Dhaka::START_SYMBOL_NAME) do
-        regex                         %w| Disjunction |                         do RootNode.new(child_nodes[0]) end
-      end
+        regex                         %w| Disjunction |                         do RootNode.new(child_nodes[0], AcceptingNode.new) end
+        regex_with_lookahead          %w| Disjunction / Disjunction |           do RootNode.new(LookaheadNode.new(child_nodes[0], child_nodes[2]), LookaheadAcceptingNode.new) end
+      end
       for_symbol('Disjunction') do
         disjunction                   %w| Alternative \| Disjunction |          do OrNode.new(child_nodes[0], child_nodes[2]) end
         alternative                   %w| Alternative |                         do child_nodes[0] end
@@ -45,7 +46,7 @@ module Dhaka
       for_symbol('Atom') do
         group                         %w| ( Disjunction ) |                     do child_nodes[1] end
         char                          %w| Character |                           do LeafNode.new(child_nodes[0]) end
-        anything                      %w| . |                                   do OrNode.new(*(ALL_CHARACTERS - ["\n"]).collect {|char| LeafNode.new(char)}) end
+        anything                      %w| . |                                   do OrNode.new(*(ALL_CHARACTERS - ["\r", "\n"]).collect {|char| LeafNode.new(char)}) end
         positive_set                  %w| [ SetContents ] |                     do OrNode.new(*child_nodes[1].collect{|char| LeafNode.new(char)}) end
         negative_set                  %w| [ ^ SetContents ] |                   do OrNode.new(*(ALL_CHARACTERS - child_nodes[2]).collect {|char| LeafNode.new(char)}) end
@@ -145,6 +146,10 @@ module Dhaka
     class ASTNode
+      def checkpoint
+        false
+      end
       def accepting
         false
       end
@@ -184,15 +189,19 @@ module Dhaka
       end
       def first
-        children.inject(Set.new([])) do |result, child|
-          result | child.first
+        result = Set.new
+        children.each do |child|
+          result.merge child.first
         end
+        result
       end
       def last
-        children.inject(Set.new([])) do |result, child|
-          result | child.last
+        result = Set.new
+        children.each do |child|
+          result.merge child.last
         end
+        result
       end
       def to_dot(graph)
@@ -234,6 +243,19 @@ module Dhaka
         end
       end
     end
+    class LookaheadNode < CatNode
+      def label
+        "/"
+      end
+      def calculate_follow_sets
+        super
+        left.last.each do |leaf_node|
+          leaf_node.follow_set.merge(Set.new([CheckpointNode.new]))
+        end
+      end
+    end
     class UnaryNode < ASTNode
       attr_reader :child
@@ -265,10 +287,6 @@ module Dhaka
     end
     class RootNode < CatNode
-      def initialize(left)
-        super(left, AcceptingNode.new())
-      end
       def label
         "start"
       end
@@ -344,6 +362,19 @@ module Dhaka
       def calculate_follow_sets
       end
     end
+    class CheckpointNode < ASTNode
+      def to_dot(graph)
+        graph.node(self, :label => "lookahead")
+      end
+      def character
+      end
+      def checkpoint
+        true
+      end
+    end
     class AcceptingNode < ASTNode
       def accepting
@@ -352,6 +383,10 @@ module Dhaka
       def character
       end
+      def action(pattern)
+        AcceptAction.new(pattern)
+      end
       def first
         Set.new([self])
@@ -364,5 +399,44 @@ module Dhaka
         graph.node(self, :label => '#')
       end
     end
+    class LookaheadAcceptingNode < AcceptingNode
+      def action(pattern)
+        LookaheadAcceptAction.new(pattern)
+      end
+    end
+    class AcceptAction
+      attr_reader :pattern
+      def initialize(pattern)
+        @pattern = pattern
+      end
+      def call(lexer_run)
+        lexer_run.accept(pattern)
+      end
+      def compile_to_ruby_source
+        "accept(#{pattern.inspect})"
+      end
+      def to_dot
+        "Accept #{pattern.inspect}"
+      end
+    end
+    class LookaheadAcceptAction < AcceptAction
+      def call(lexer_run)
+          lexer_run.accept_last_saved_checkpoint(pattern)
+      end
+      def compile_to_ruby_source
+        "accept_with_lookahead(#{pattern.inspect})"
+      end
+      def to_dot
+        "Accept With Lookahead #{pattern.inspect}"
+      end
+    end
   end
-end
+end