RubyGems - simply_stored - Versions diffs - 0.1.4 - Mend

simply_stored 0.1.4

Files changed (121) hide show

data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/dfa.rb ADDED Viewed

@@ -0,0 +1,121 @@
+module Dhaka
+  module LexerSupport
+    # Raised when an invalid regular expression pattern is encountered
+    # in a LexerSpecification
+    class InvalidRegexException < StandardError
+    end
+    class CheckpointAction
+      attr_reader :pattern
+      def initialize(pattern)
+        @pattern = pattern
+      end
+      def call(lexer_run)
+        lexer_run.save_checkpoint(pattern)
+      end
+      def compile_to_ruby_source
+        "add_checkpoint(#{pattern.inspect})"
+      end
+    end
+    class DFA < StateMachine #:nodoc:
+      def initialize(regex)
+        @regex = regex
+        tokenize_result = RegexTokenizer.tokenize(@regex)
+        raise InvalidRegexException.new(tokenize_error_message(tokenize_result)) if tokenize_result.has_error?
+        parse_result = RegexParser.parse(tokenize_result)
+        raise InvalidRegexException.new(parse_error_message(parse_result)) if parse_result.has_error?
+        ast = parse_result
+        ast.calculate_follow_sets
+        super(ItemSet.new(ast.first))
+      end
+      def tokenize_error_message(tokenize_result)
+        "Invalid character #{@regex[tokenize_result.unexpected_char_index].chr}: #{@regex.dup.insert(tokenize_result.unexpected_char_index, '>>>')}"
+      end
+      def parse_error_message(parse_result)
+        unexpected_token = parse_result.unexpected_token
+        if unexpected_token.symbol_name == END_SYMBOL_NAME
+          "Unexpected end of regex."
+        else
+          "Unexpected token #{parse_result.unexpected_token.symbol_name}: #{@regex.dup.insert(parse_result.unexpected_token.input_position, '>>>')}"
+        end
+      end
+      def dest_key_for key, char
+        result = ItemSet.new
+        key.each do |position|
+          result.merge(position.follow_set) if position.character == char
+        end
+        result
+      end
+      def new_state_for_key key
+        accepting = key.detect {|position| position.accepting}
+        if accepting
+          new_state = State.new(self, accepting.action(@regex))
+        else
+          new_state = State.new(self)
+        end
+        if key.any? {|position| position.checkpoint}
+          new_state.checkpoint_actions << CheckpointAction.new(@regex)
+        end
+        new_state
+      end
+      def transition_characters key
+        result = Set.new
+        key.each do |node|
+          result << node.character unless (node.accepting || node.checkpoint)
+        end
+        result
+      end
+      def match(input)
+        DFARun.new(self, input).match
+      end
+    end
+    class DFARun
+      def initialize(dfa, input)
+        @dfa, @input = dfa, input
+        @matched = ""
+        @not_yet_accepted = ""
+        @curr_state = @dfa.start_state
+      end
+      def match
+        @input.unpack("C*").each do |i|
+          break unless dest_state = @curr_state.transitions[i.chr]
+          @not_yet_accepted << i.chr
+          @curr_state = dest_state
+          @curr_state.process(self)
+        end
+        @matched
+      end
+      def save_checkpoint(pattern)
+        @last_saved_checkpoint = @matched + @not_yet_accepted
+      end
+      def accept(pattern)
+        @matched.concat @not_yet_accepted
+        @not_yet_accepted = ""
+      end
+      def accept_last_saved_checkpoint(pattern)
+        @matched = @last_saved_checkpoint
+        @not_yet_accepted = ""
+      end
+    end
+  end
+end

data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexeme.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Dhaka
+  # Represents a portion of the input string that has been recognized as matching a given lexer pattern.
+  class Lexeme
+    # The pattern matched by this lexeme.
+    attr_accessor :pattern, :characters
+    # +input_position+ is the index in the input stream that this lexeme starts at.
+    attr_reader :input_position
+    def initialize(input_position) #:nodoc:
+      @input_position = input_position
+      @characters = []
+    end
+    # The substring of the input stream that this lexeme is comprised of.
+    def value
+      characters.join
+    end
+    def accepted? #:nodoc:
+      pattern
+    end
+    def << char #:nodoc:
+      characters << char
+    end
+    def concat chars #:nodoc:
+      characters.concat chars
+    end
+  end
+end

data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer.rb ADDED Viewed

@@ -0,0 +1,70 @@
+module Dhaka
+  # The lexer generator. To generate a lexer from a lexer specification +MyLexerSpecification+:
+  #   lexer = Dhaka::Lexer.new(MyLexerSpecification)
+  #
+  # To compile this lexer as +MyLexer+ to a string of Ruby source:
+  #   lexer.compile_to_ruby_source_as(:MyLexer)
+  class Lexer < LexerSupport::StateMachine
+    attr_reader :specification
+    # Creates a new lexer from a given specification.
+    def initialize(specification)
+      dfas           = {}
+      @specification = specification
+      specification.items.each do |pattern, item|
+        dfas[pattern] = LexerSupport::DFA.new(pattern)
+      end
+      super(ItemSet.new(dfas.values.collect{|dfa| dfa.start_state}))
+    end
+    # Compiles the lexer to Ruby code that when executed, reloads all the states and actions of the lexer
+    # into a class named +lexer_class_name+.
+    def compile_to_ruby_source_as lexer_class_name
+      result  =   "class #{lexer_class_name} < Dhaka::CompiledLexer\n\n"
+      result <<   "  self.specification = #{specification.name}\n\n"
+      result <<   "  start_with #{start_state.object_id}\n\n"
+      @states.each do |key, state|
+        result << "#{state.compile_to_ruby_source}\n\n"
+      end
+      result <<   "end"
+      result
+    end
+    # Returns a LexerRun that tokenizes +input+.
+    def lex input
+      LexerRun.new(self, input)
+    end
+    def action_for_pattern pattern #:nodoc
+      @specification.items[pattern].action
+    end
+    private
+      def new_state_for_key key
+        accepting_states = key.select {|state| state.accepting?}
+        unless accepting_states.empty?
+          highest_precedence_state = accepting_states.min {|a, b| @specification.items[a.action.pattern] <=> @specification.items[b.action.pattern]}
+          new_state = LexerSupport::State.new(self, highest_precedence_state.action)
+        else
+          new_state = LexerSupport::State.new(self)
+        end
+        key.select {|state| !state.checkpoint_actions.empty?}.each do |state|
+          new_state.checkpoint_actions.concat state.checkpoint_actions
+        end
+        new_state
+      end
+      def transition_characters states
+        states.collect{|state| state.transitions.keys}.flatten.uniq
+      end
+      def dest_key_for states, char
+        result = ItemSet.new
+        states.each do |state|
+          dest_state = state.transitions[char]
+          result << dest_state if dest_state
+        end
+        result
+      end
+  end
+end

data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/lexer_run.rb ADDED Viewed

@@ -0,0 +1,78 @@
+module Dhaka
+  # Represents a run of a lexer on a given input string.
+  class LexerRun
+    include Enumerable
+    attr_reader :current_lexeme
+    def initialize lexer, input
+      @lexer, @input          = lexer, input
+      @input_position         = 0
+      @not_yet_accepted_chars = []
+      @last_saved_checkpoints = {}
+    end
+    # Constructs a token of type +symbol_name+ from the +current_lexeme+.
+    def create_token(symbol_name, value = current_lexeme.characters.join)
+      Token.new(symbol_name, value, current_lexeme.input_position)
+    end
+    # Yields each token as it is recognized. Returns a TokenizerErrorResult if an error occurs during tokenization.
+    def each
+      reset_and_rewind
+      loop do
+        c = curr_char
+        break if (c == "\0" && @not_yet_accepted_chars.empty? && !@current_lexeme.accepted?)
+        dest_state  = @curr_state.transitions[c]
+        unless dest_state
+          return TokenizerErrorResult.new(@input_position) unless @current_lexeme.accepted?
+          token = get_token
+          yield token if token
+          reset_and_rewind
+        else
+          @curr_state = dest_state
+          @not_yet_accepted_chars << c
+          @curr_state.process(self)
+          advance
+        end
+      end
+      yield Token.new(END_SYMBOL_NAME, nil, nil)
+    end
+    def accept(pattern) #:nodoc:
+      @current_lexeme.pattern = pattern
+      @current_lexeme.concat @not_yet_accepted_chars
+      @not_yet_accepted_chars = []
+    end
+    def save_checkpoint(pattern) #:nodoc:
+      @last_saved_checkpoints[pattern] = (@current_lexeme.characters + @not_yet_accepted_chars)
+    end
+    def accept_last_saved_checkpoint(pattern) #:nodoc:
+      @current_lexeme.pattern = pattern
+      @current_lexeme.concat @not_yet_accepted_chars
+      @not_yet_accepted_chars = @current_lexeme.characters[(@last_saved_checkpoints[pattern].size)..-1]
+      @current_lexeme.characters = @last_saved_checkpoints[pattern].dup
+    end
+    private
+      def reset_and_rewind
+        @input_position -= @not_yet_accepted_chars.size
+        @current_lexeme = Lexeme.new(@input_position)
+        @curr_state     = @lexer.start_state
+        @not_yet_accepted_chars = []
+      end
+      def curr_char
+        (@input[@input_position] || 0).chr
+      end
+      def advance
+        @input_position += 1
+      end
+      def get_token
+        instance_eval(&@lexer.action_for_pattern(@current_lexeme.pattern))
+      end
+  end
+end

data/test/vendor/dhaka-2.2.1/lib/dhaka/lexer/regex_grammar.rb ADDED Viewed

@@ -0,0 +1,392 @@
+module Dhaka
+  module LexerSupport #:nodoc:all
+    class RegexGrammar < Dhaka::Grammar
+      for_symbol(Dhaka::START_SYMBOL_NAME) do
+        regex                         %w| Disjunction |                         do Dhaka::LexerSupport::RootNode.new(child_nodes[0], Dhaka::LexerSupport::AcceptingNode.new) end
+        regex_with_lookahead          %w| Disjunction / Disjunction |           do Dhaka::LexerSupport::RootNode.new(Dhaka::LexerSupport::LookaheadNode.new(child_nodes[0], child_nodes[2]), Dhaka::LexerSupport::LookaheadAcceptingNode.new) end
+      end
+      for_symbol('Disjunction') do
+        disjunction                   %w| Alternative \| Disjunction |          do Dhaka::LexerSupport::OrNode.new(child_nodes[0], child_nodes[2]) end
+        alternative                   %w| Alternative |                         do child_nodes[0] end
+      end
+      for_symbol('Alternative') do
+        concatenation                 %w| Alternative Term |                    do Dhaka::LexerSupport::CatNode.new(child_nodes[0], child_nodes[1]) end
+        term                          %w| Term |                                do child_nodes[0] end
+      end
+      for_symbol('Term') do
+        zero_or_more                  %w| Atom * |                              do Dhaka::LexerSupport::ZeroOrMoreNode.new(child_nodes[0]) end
+        one_or_more                   %w| Atom + |                              do Dhaka::LexerSupport::OneOrMoreNode.new(child_nodes[0]) end
+        zero_or_one                   %w| Atom ? |                              do Dhaka::LexerSupport::ZeroOrOneNode.new(child_nodes[0]) end
+        atom                          %w| Atom |                                do child_nodes[0] end
+      end
+      for_symbol('Atom') do
+        group                         %w| ( Disjunction ) |                     do child_nodes[1] end
+        char                          %w| Character |                           do Dhaka::LexerSupport::LeafNode.new(child_nodes[0]) end
+        anything                      %w| . |                                   do Dhaka::LexerSupport::OrNode.new(*(Dhaka::LexerSupport::ALL_CHARACTERS - ["\r", "\n"]).collect {|char| Dhaka::LexerSupport::LeafNode.new(char)}) end
+        positive_set                  %w| [ SetContents ] |                     do OrNode.new(*child_nodes[1].collect{|char| Dhaka::LexerSupport::LeafNode.new(char)}) end
+        negative_set                  %w| [ ^ SetContents ] |                   do Dhaka::LexerSupport::OrNode.new(*(Dhaka::LexerSupport::ALL_CHARACTERS - child_nodes[2]).collect {|char| Dhaka::LexerSupport::LeafNode.new(char)}) end
+        Dhaka::LexerSupport::CLASSES.each do |char, expansion|
+          send("character_class_#{char}", ['\\', char]) do
+            Dhaka::LexerSupport::OrNode.new(*Dhaka::LexerSupport::CLASSES[char].collect {|c| Dhaka::LexerSupport::LeafNode.new(c)})
+          end
+        end
+        Dhaka::LexerSupport::OPERATOR_CHARACTERS.each do |char, method_name|
+          send(method_name, ['\\', char]) do
+            Dhaka::LexerSupport::LeafNode.new(char)
+          end
+        end
+      end
+      for_symbol('Character') do
+        letter_character              %w| Letter |                              do child_nodes[0] end
+        digit_character               %w| Digit |                               do child_nodes[0] end
+        white_space_character         %w| Whitespace |                          do child_nodes[0] end
+        symbol_character              %w| Symbol |                              do child_nodes[0] end
+      end
+      for_symbol('SetContents') do
+        single_item                   %w| SetItem |                             do child_nodes[0] end
+        multiple_items                %w| SetContents SetItem |                 do child_nodes[0].concat child_nodes[1] end
+      end
+      for_symbol('SetItem') do
+        single_char_item              %w| SetCharacter |                        do [child_nodes[0]] end
+        lower_case_letter_range       %w| LowercaseLetter - LowercaseLetter |   do (child_nodes[0]..child_nodes[2]).to_a end
+        upper_case_letter_range       %w| UppercaseLetter - UppercaseLetter |   do (child_nodes[0]..child_nodes[2]).to_a end
+        digit_range                   %w| Digit - Digit |                       do (child_nodes[0]..child_nodes[2]).to_a end
+      end
+      for_symbol('Letter') do
+        lower_case_letter             %w| LowercaseLetter |                     do child_nodes[0] end
+        upper_case_letter             %w| UppercaseLetter |                     do child_nodes[0] end
+      end
+      for_symbol('LowercaseLetter') do
+        Dhaka::LexerSupport::LOWERCASE_LETTERS.each do |letter|
+          send("lower_char_letter_#{letter}", letter) do
+            letter
+          end
+        end
+      end
+      for_symbol('UppercaseLetter') do
+        Dhaka::LexerSupport::UPPERCASE_LETTERS.each do |letter|
+          send("upper_case_letter_#{letter}", letter) do
+            letter
+          end
+        end
+      end
+      for_symbol('Digit') do
+        Dhaka::LexerSupport::DIGITS.each do |digit|
+          send("digit_#{digit}", digit) do
+            digit
+          end
+        end
+      end
+      for_symbol('Whitespace') do
+        Dhaka::LexerSupport::WHITESPACE.each do |whitespace_char|
+          send("whitespace_#{to_byte(whitespace_char)}", whitespace_char) do
+            whitespace_char
+          end
+        end
+      end
+      for_symbol('Symbol') do
+        Dhaka::LexerSupport::SYMBOLS.each do |symbol_char|
+          send("symbol_char_#{to_byte(symbol_char)}", symbol_char) do
+            symbol_char
+          end
+        end
+      end
+      for_symbol('SetCharacter') do
+        (Dhaka::LexerSupport::ALL_CHARACTERS - Dhaka::LexerSupport::SET_OPERATOR_CHARACTERS).each do |char|
+          send("set_character_#{to_byte(char)}", char) do
+            char
+          end
+        end
+        Dhaka::LexerSupport::SET_OPERATOR_CHARACTERS.each do |char|
+          send("set_operator_character_#{to_byte(char)}", ['\\', char]) do
+            char
+          end
+        end
+      end
+    end
+    class ASTNode
+      def checkpoint
+        false
+      end
+      def accepting
+        false
+      end
+    end
+    class BinaryNode < ASTNode
+      attr_reader :left, :right
+      def initialize left, right
+        @left, @right = left, right
+      end
+      def to_dot(graph)
+        graph.node(self, :label => label)
+        graph.edge(self, left)
+        graph.edge(self, right)
+        left.to_dot(graph)
+        right.to_dot(graph)
+      end
+      def calculate_follow_sets
+        left.calculate_follow_sets
+        right.calculate_follow_sets
+      end
+    end
+    class OrNode < ASTNode
+      attr_reader :children
+      def initialize(*children)
+        @children = children
+      end
+      def label
+        "|"
+      end
+      def nullable
+        children.any? {|child| child.nullable}
+      end
+      def first
+        result = Set.new
+        children.each do |child|
+          result.merge child.first
+        end
+        result
+      end
+      def last
+        result = Set.new
+        children.each do |child|
+          result.merge child.last
+        end
+        result
+      end
+      def to_dot(graph)
+        graph.node(self, :label => label)
+        children.each do |child|
+          graph.edge(self, child)
+          child.to_dot(graph)
+        end
+      end
+      def calculate_follow_sets
+        children.each do |child|
+          child.calculate_follow_sets
+        end
+      end
+    end
+    class CatNode < BinaryNode
+      def label
+        "cat"
+      end
+      def nullable
+        left.nullable && right.nullable
+      end
+      def first
+        left.nullable ? (left.first | right.first) : left.first
+      end
+      def last
+        right.nullable ? (left.last | right.last) : right.last
+      end
+      def calculate_follow_sets
+        super
+        left.last.each do |leaf_node|
+          leaf_node.follow_set.merge right.first
+        end
+      end
+    end
+    class LookaheadNode < CatNode
+      def label
+        "/"
+      end
+      def calculate_follow_sets
+        super
+        left.last.each do |leaf_node|
+          leaf_node.follow_set.merge(Set.new([CheckpointNode.new]))
+        end
+      end
+    end
+    class UnaryNode < ASTNode
+      attr_reader :child
+      def initialize child
+        @child = child
+      end
+      def to_dot(graph)
+        graph.node(self, :label => label)
+        graph.edge(self, child)
+        child.to_dot(graph)
+      end
+      def nullable
+        child.nullable
+      end
+      def first
+        child.first
+      end
+      def last
+        child.last
+      end
+      def calculate_follow_sets
+        child.calculate_follow_sets
+      end
+    end
+    class RootNode < CatNode
+      def label
+        "start"
+      end
+      def head_node?
+        true
+      end
+    end
+    class ZeroOrMoreNode < UnaryNode
+      def label
+        "*"
+      end
+      def nullable
+        true
+      end
+      def calculate_follow_sets
+        super
+        last.each do |leaf_node|
+          leaf_node.follow_set.merge first
+        end
+      end
+    end
+    class ZeroOrOneNode < UnaryNode
+      def label
+        "?"
+      end
+      def nullable
+        true
+      end
+    end
+    class OneOrMoreNode < UnaryNode
+      def label
+        "+"
+      end
+      def calculate_follow_sets
+        super
+        last.each do |leaf_node|
+          leaf_node.follow_set.merge first
+        end
+      end
+    end
+    class LeafNode < ASTNode
+      attr_reader :character, :follow_set
+      def initialize character
+        @character = character
+        @follow_set = Set.new
+      end
+      def to_dot(graph)
+        graph.node(self, :label => character)
+      end
+      def nullable
+        false
+      end
+      def first
+        Set.new([self])
+      end
+      def last
+        Set.new([self])
+      end
+      def calculate_follow_sets
+      end
+    end
+    class CheckpointNode < ASTNode
+      def to_dot(graph)
+        graph.node(self, :label => "lookahead")
+      end
+      def character
+      end
+      def checkpoint
+        true
+      end
+    end
+    class AcceptingNode < ASTNode
+      def accepting
+        true
+      end
+      def character
+      end
+      def action(pattern)
+        AcceptAction.new(pattern)
+      end
+      def first
+        Set.new([self])
+      end
+      def calculate_follow_sets
+      end
+      def to_dot(graph)
+        graph.node(self, :label => '#')
+      end
+    end
+    class LookaheadAcceptingNode < AcceptingNode
+      def action(pattern)
+        LookaheadAcceptAction.new(pattern)
+      end
+    end
+  end
+end