RubyGems - rparsec-ruby19 - Versions diffs - 1.0 - Mend

rparsec-ruby19 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

data/rparsec.rb +3 -0
data/rparsec/context.rb +83 -0
data/rparsec/error.rb +28 -0
data/rparsec/expressions.rb +184 -0
data/rparsec/functors.rb +274 -0
data/rparsec/id_monad.rb +17 -0
data/rparsec/keywords.rb +114 -0
data/rparsec/locator.rb +40 -0
data/rparsec/misc.rb +130 -0
data/rparsec/monad.rb +62 -0
data/rparsec/operators.rb +103 -0
data/rparsec/parser.rb +894 -0
data/rparsec/parser_monad.rb +23 -0
data/rparsec/parsers.rb +623 -0
data/rparsec/token.rb +43 -0
data/test/src/expression_test.rb +124 -0
data/test/src/full_parser_test.rb +95 -0
data/test/src/functor_test.rb +66 -0
data/test/src/import.rb +5 -0
data/test/src/keyword_test.rb +28 -0
data/test/src/operator_test.rb +21 -0
data/test/src/parser_test.rb +53 -0
data/test/src/perf_benchmark.rb +25 -0
data/test/src/s_expression_test.rb +33 -0
data/test/src/scratch.rb +41 -0
data/test/src/simple_monad_test.rb +22 -0
data/test/src/simple_parser_test.rb +423 -0
data/test/src/sql.rb +268 -0
data/test/src/sql_parser.rb +258 -0
data/test/src/sql_test.rb +128 -0
data/test/src/tests.rb +13 -0
metadata +95 -0

data/rparsec/id_monad.rb ADDED

@@ -0,0 +1,17 @@
+module RParsec
+class IdMonad
+  def value v
+    v
+  end
+  def bind prev
+    yield prev
+  end
+  def mplus a, b
+    a
+  end
+end
+end # module

data/rparsec/keywords.rb ADDED

@@ -0,0 +1,114 @@
+require 'rparsec/parser'
+module RParsec
+#
+# This class helps building lexers and parsers for keywords.
+#
+class Keywords
+  extend Parsers
+  private_class_method :new
+  #
+  # The symbol used to identify a keyword token
+  #
+  attr_reader :keyword_symbol
+  #
+  # The lexer that parses all the keywords represented
+  #
+  attr_reader :lexer
+  #
+  # Do we lex case sensitively?
+  #
+  def case_sensitive?
+    @case_sensitive
+  end
+  #
+  # To create an instance that lexes the given keywords
+  # case sensitively.
+  # _default_lexer_ is used to lex a token first, the token text is then compared with
+  # the given keywords. If it matches any of the keyword, a keyword token is generated instead
+  # using _keyword_symbol_.
+  # The _block_ parameter, if present, is used to convert the token text to another object
+  # when the token is recognized during grammar parsing phase.
+  #
+  def self.case_sensitive(words, default_lexer=word.token(:word), keyword_symbol=:keyword, &block)
+    new(words, true, default_lexer, keyword_symbol, &block)
+  end
+  #
+  # To create an instance that lexes the given keywords
+  # case insensitively.
+  # _default_lexer_ is used to lex a token first, the token text is then compared with
+  # the given keywords. If it matches any of the keyword, a keyword token is generated instead
+  # using _keyword_symbol_.
+  # The _block_ parameter, if present, is used to convert the token text to another object
+  # when the token is recognized during parsing phase.
+  #
+  def self.case_insensitive(words, default_lexer=word.token(:word), keyword_symbol=:keyword, &block)
+    new(words, false, default_lexer, keyword_symbol, &block)
+  end
+  # scanner has to return a string
+  def initialize(words, case_sensitive, default_lexer, keyword_symbol, &block)
+    @default_lexer, @case_sensitive, @keyword_symbol = default_lexer, case_sensitive, keyword_symbol
+    # this guarantees that we have copy of the words array and all the word strings.
+    words = copy_words(words, case_sensitive)
+    @name_map = {}
+    @symbol_map = {}
+    word_map = {}
+    words.each do |w|
+      symbol = "#{keyword_symbol}:#{w}".to_sym
+      word_map[w] = symbol
+      parser = Parsers.token(symbol, &block)
+      @symbol_map["#{w}".to_sym] = parser
+      @name_map[w] = parser
+    end
+    @lexer = make_lexer(default_lexer, word_map)
+  end
+  #
+  # Get the parser that recognizes the token of the given keyword during the parsing phase.
+  #
+  def parser(key)
+    result = nil
+    if key.kind_of? String
+      name = canonical_name(key)
+      result = @name_map[name]
+    else
+      result = @symbol_map[key]
+    end
+    raise ArgumentError, "parser not found for #{key}" if result.nil?
+    result
+  end
+  alias [] parser
+  private
+  def make_lexer(default_lexer, word_map)
+    default_lexer.map do |tok|
+      text,ind = tok.text, tok.index
+      key = canonical_name(text)
+      my_symbol = word_map[key]
+      case when my_symbol.nil? then tok
+        else Token.new(my_symbol, text, ind) end
+    end
+  end
+  def canonical_name(name)
+    case when @case_sensitive then name else name.downcase end
+  end
+  def copy_words(words, case_sensitive)
+    words.map do |w|
+      case when case_sensitive then w.dup else w.downcase end
+    end
+  end
+end
+end # module

data/rparsec/locator.rb ADDED

@@ -0,0 +1,40 @@
+require 'rparsec/misc'
+module RParsec
+class CodeLocator
+  extend DefHelper
+  def_readable :code
+  LF = ?\n
+  def locate(ind)
+    return _locateEof if ind >= code.length
+    line, col = 1,1
+    return line,col if ind<=0
+    for i in (0...ind)
+      c = code[i]
+      if c == LF
+        line, col = line+1, 1
+      else
+        col = col+1
+      end
+    end
+    return line, col
+  end
+  def _locateEof
+    line, col = 1, 1
+    code.each_byte do |c|
+      if c == LF
+        line, col = line+1, 1
+      else
+        col = col+1
+      end
+    end
+    return line, col
+  end
+end
+end # module

data/rparsec/misc.rb ADDED

@@ -0,0 +1,130 @@
+module RParsec
+#
+# Internal utility functions for string manipulations.
+#
+module StringUtils
+  #
+  # Does _str_ starts with the _sub_ string?
+  #
+  def self.starts_with? str, sub
+    return true if sub.nil?
+    len = sub.length
+    return false if len > str.length
+    for i in (0...len)
+      return false if str[i] != sub[i]
+    end
+    true
+  end
+end
+#
+# Helpers for defining ctor.
+#
+module DefHelper
+  def def_ctor(*vars)
+    define_method(:initialize) do |*params|
+      vars.each_with_index do |var, i|
+        instance_variable_set("@"+var.to_s, params[i])
+      end
+    end
+  end
+  def def_readable(*vars)
+    attr_reader(*vars)
+    def_ctor(*vars)
+  end
+  def def_mutable(*vars)
+    attr_accessor(*vars)
+    def_ctor(*vars)
+  end
+end
+#
+# To type check method parameters.
+#
+module TypeChecker
+  private
+  def nth n
+    th = case n when 0 then 'st' when 1 then 'nd' else 'th' end
+    "#{n+1}#{th}"
+  end
+  public
+  def check_arg_type expected, obj, mtd, n=0
+    unless obj.kind_of? expected
+      raise ArgumentError,
+        "#{obj.class} assigned to #{expected} for the #{nth n} argument of #{mtd}."
+    end
+  end
+  def check_arg_array_type elem_type, arg, mtd, n=0
+    check_arg_type Array, arg, mtd, n
+    arg.each_with_index do |x, i|
+      unless x.kind_of? elem_type
+        raise ArgumentError,
+          "#{x.class} assigned to #{elem_type} for the #{nth i} element of the #{nth n} argument of #{mtd}."
+      end
+    end
+  end
+  def check_vararg_type expected, args, mtd, n = 0
+    (n...args.length).each do |i|
+      check_arg_type expected, args[i], mtd, i
+    end
+  end
+  extend self
+end
+#
+# To add declarative signature support.
+#
+module Signature
+  # Signatures = {}
+  def def_sig sym, *types
+    types.each_with_index do |t,i|
+      unless t.kind_of? Class
+        TypeChecker.check_arg_type Class, t, :def_sig, i unless t.kind_of? Array
+        TypeChecker.check_arg_type Class, t, :def_sig, i unless t.length <= 1
+        TypeChecker.check_arg_array_type Class, t, :def_sig, i
+      end
+    end
+    # Signatures[sym] = types
+    __intercept_method_to_check_param_types__(sym, types)
+  end
+  private
+  def __intercept_method_to_check_param_types__(sym, types)
+    mtd = instance_method(sym)
+    helper = "_#{sym}_param_types_checked_helper".to_sym
+    define_method(helper) do |*params|
+      star_type, star_ind = nil, nil
+      types.each_with_index do |t, i|
+        t = star_type unless star_type.nil?
+        arg = params[i]
+        if t.kind_of? Class
+          TypeChecker.check_arg_type t, arg, sym, i
+        elsif t.empty?
+          TypeChecker.check_arg_type Array, arg, sym, i
+        else
+          star_type, star_ind = t[0], i
+          break
+        end
+      end
+      TypeChecker.check_vararg_type star_type, params, sym, star_ind unless star_ind.nil?
+      mtd.bind(self)
+    end
+    module_eval """
+    def #{sym}(*params, &block)
+      #{helper}(*params).call(*params, &block)
+    end
+    """
+  end
+end
+end # module

data/rparsec/monad.rb ADDED

@@ -0,0 +1,62 @@
+module RParsec
+#
+# module for Monad
+#
+module Monad
+  attr_reader :this
+  #
+  # To initialize with a monad implementation and an object that obeys the monad law.
+  #
+  def initMonad(m, v)
+    raise ArgumentError, 'monad cannot be nil' if m.nil?
+    @monad = m;
+    @this = v;
+  end
+  #
+  # To create a value based on the monad impl.
+  #
+  def value v
+    @monad.value v
+  end
+  #
+  # Run the _bind_ operation on the encapsulated object following the monad law.
+  #
+  def bind(&binder)
+    @monad.bind(@this, &binder)
+  end
+  #
+  # Run the _seq_ operation on the encapsulated object following the monad law.
+  # If _seq_ is not defined by the monad impl, use _bind_ to implement.
+  #
+  def seq(other)
+    if @monad.respond_to? :seq
+      @monad.seq(other)
+    else bind {|x|other}
+    end
+  end
+  #
+  # Run the _map_ operation on the encapsulated object following the monad law.
+  # _bind_ is used to implement.
+  #
+  def map(&mapper)
+    bind do |v|
+      result = mapper.call v;
+      value(result);
+    end
+  end
+  #
+  # Run the _plus_ operation on the encapsulated object following the MonadPlus law.
+  #
+  def plus other
+    @monad.mplus(@this, other.this)
+  end
+end
+end # module

data/rparsec/operators.rb ADDED

@@ -0,0 +1,103 @@
+require 'rparsec/parser'
+module RParsec
+#
+# This class helps building lexer and parser for operators.
+# The case that one operator (++ for example) contains another operator (+)
+# is automatically handled so client code don't have to worry about ambiguity.
+#
+class Operators
+  #
+  # To create an instance of Operators for the given operators.
+  # The _block_ parameter, if present, is used to convert the token text to another object
+  # when the token is recognized during grammar parsing phase.
+  #
+  def initialize(ops, &block)
+    @lexers = {}
+    @parsers = {}
+    sorted = Operators.sort(ops)
+    lexers = sorted.map do |op|
+      symbol = op.to_sym
+      result = nil
+      if op.length == 1
+        result = Parsers.char(op)
+      else
+        result = Parsers.str(op)
+      end
+      result = result.token(symbol)
+      @lexers[symbol] = result
+      @parsers[symbol] = Parsers.token(symbol, &block)
+      result
+    end
+    @lexer = Parsers.sum(*lexers)
+  end
+  #
+  # Get the parser for the given operator.
+  #
+  def parser(op)
+    result = @parsers[op.to_sym]
+    raise ArgumentError, "parser not found for #{op}" if result.nil?
+    result
+  end
+  alias [] parser
+  #
+  # Get the lexer that lexes operators.
+  # If an operator is specified, the lexer for that operator is returned.
+  #
+  def lexer(op=nil)
+    return @lexer if op.nil?
+    @lexers[op.to_sym]
+  end
+  #
+  # Sort an array of operators so that contained operator appears after containers.
+  # When no containment exist between two operators, the shorter one takes precedence.
+  #
+  def self.sort(ops)
+    #sort the array by longer-string-first.
+    ordered = ops.sort {|x, y|y.length <=> x.length}
+    suites = []
+    # loop from the longer to shorter string
+    ordered.each do |s|
+      populate_suites(suites, s)
+    end
+    # suites are populated with bigger suite first
+    to_array suites
+  end
+  private
+  def self.populate_suites(suites, s)
+    # populate the suites so that bigger suite first
+    # this way we can use << operator for non-contained strings.
+    # we need to start from bigger suite. So loop in reverse order
+    for suite in suites
+      return if populate_suite(suite, s)
+    end
+    suites << [s]
+  end
+  def self.populate_suite(suite, s)
+    # loop from the tail of the suite
+    for i in (1..suite.length)
+      ind = suite.length - i
+      cur = suite[ind]
+      if StringUtils.starts_with? cur, s
+        suite.insert(ind+1, s) unless cur == s
+        return true
+      end
+    end
+    false
+  end
+  def self.to_array suites
+    suites.reverse!.flatten!
+  end
+end
+end # module