RubyGems - carray-dataframe - Versions diffs - 1.0.0 → 1.1.1 - Mend

carray-dataframe 1.0.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +5 -5
data/API.txt +1 -7
data/README.md +3 -1
data/Rakefile +11 -0
data/carray-dataframe.gemspec +8 -6
data/lib/carray-dataframe.rb +13 -0
data/lib/carray-dataframe/arranger.rb +209 -0
data/lib/carray-dataframe/cadf_array.rb +106 -0
data/lib/carray-dataframe/converter.rb +97 -0
data/lib/carray-dataframe/dataframe.rb +1279 -0
data/lib/carray-dataframe/group.rb +199 -0
data/lib/carray-dataframe/iloc_accessor.rb +62 -0
data/lib/carray-dataframe/io.rb +96 -0
data/lib/carray-dataframe/join.rb +283 -0
data/lib/carray-dataframe/loc_accessor.rb +145 -0
data/lib/carray-dataframe/pivot.rb +54 -0
data/lib/carray-dataframe/reference.rb +142 -0
data/lib/carray-dataframe/to_html.rb +102 -0
metadata +23 -28
data/examples/R/fit.rb +0 -24
data/examples/R/iris.rb +0 -9
data/examples/R/japan_area.rb +0 -30
data/examples/R/kyaku.rb +0 -22
data/examples/group_by.rb +0 -78
data/examples/hist.rb +0 -27
data/examples/iris.rb +0 -29
data/examples/map.rb +0 -23
data/examples/match.rb +0 -21
data/examples/test.xlsx +0 -0
data/examples/test1.rb +0 -44
data/examples/test2.rb +0 -14
data/examples/test3.db +0 -0
data/examples/test3.rb +0 -11
data/examples/test3.xlsx +0 -0
data/examples/to_excel.rb +0 -27
data/lib/R.rb +0 -365
data/lib/carray/autoload/autoload_dataframe_dataframe.rb +0 -26
data/lib/carray/dataframe/dataframe.rb +0 -1640

data/examples/R/fit.rb DELETED

@@ -1,24 +0,0 @@
-require "carray"
-require "R"
-R.run
-x = CArray.float(200).span(0..4r)
-v = x.random(4)-2
-a = 3
-b = 5
-c = 7
-y = a*x**2 + b*x + c + v
-res = R %{
-  nls(y ~ a*x^2 + b*x + c, start=c(a=100,b=1,c=1), trace=TRUE)
-}, :x=>x, :y=>y
-a1,b1,c1 = R.coef(res).to_ruby.values_at("a","b","c")
-CA.gnuplot {
-  plot [x,y],
-       [x,a1*x**2+b1*x+c, nil, "lines"]
-}

data/examples/R/iris.rb DELETED

@@ -1,9 +0,0 @@
-require "R"
-R.run
-iris = R.iris
-CA.gnuplot {
-  plot [iris.Sepal_Length, iris.Sepal_Width]
-}

data/examples/R/japan_area.rb DELETED

@@ -1,30 +0,0 @@
-#
-# From https://oku.edu.mie-u.ac.jp/~okumura/stat/100410a.html
-#
-require "R"
-R.run
-areaname = ["北海道","本州","四国","九州","沖縄"].to_ca
-areasize = [83457,231113,18792,42191,2276].to_ca / 10000.0
-R %{
-  par(family="HiraKakuProN-W3")
-  par(las=1)
-  par(mgp=c(2,0.8,0))
-  barplot(areasize, names.arg=areaname)
-  axis(2, labels="面積 (万km^2)", at=20, hadj=0.3, padj=-1, tick=FALSE)
-}, :areasize=>areasize, :areaname=>areaname
-gets
-R {
-  par :family=>"HiraKakuProN-W3"
-  par :las=>1
-  par :mgp=>[2,0.8,0]
-  barplot areasize, "names.arg"=>areaname
-  axis 2, :labels=>"面積 (万km^2)", :at=>20, :hadj=>0.3, :padj=>-1, :tick=>false
-}
-gets

data/examples/R/kyaku.rb DELETED

@@ -1,22 +0,0 @@
-require "carray"
-CA.gnuplot {
-  terminal %{ wxt }
-  (1..10).each do |n|
-    x = CArray.double(1000000) {0}
-    n.times do
-      x += CArray.double(1000000).random
-    end
-    x = x/n
-    df = CADataFrame.new(:x=>x)
-    h = df.histogram(:x, CA_DOUBLE(0..1,0.01))
-    plot [h.x, h.count, nil, "boxes fill solid 0.5 noborder"],
-         :x=>[nil, 0..1],
-         :title=>n.to_s,
-         :nopause=>true
-    sleep 0.5
-  end
-  gets
-}

data/examples/group_by.rb DELETED

@@ -1,78 +0,0 @@
-require "carray"
-csv =<<HERE
-name,v1,v2
-A,1,3
-B,3,2
-C,2,1
-B,1,3
-C,1,4
-A,4,2
-B,5,3
-C,3,3
-C,1,1
-C,6,3
-C,8,1
-A,1,2
-HERE
-f = CADataFrame.from_csv(csv) {
-  header
-  body
-}.arrange {
-  int :v1, :v2
-}
-p f.resample { |l, c|
-  c = c.reshape(false,2)
-  case l
-  when "name"
-    c[nil,-1]
-  else
-    c.max(1)
-  end
-}
-p df = CADataFrame.concat(f.calculate(:sum),
-                          f.calculate(:mean)).arrange {
-  eliminate :name
-  append :sum, v1 + v2
-}
-p f.group_by(:name).table {
-  {
-    :count  => row_number,
-    :v1_sum => v1.sum,
-    :v1_mean => v1.mean,
-    :v2_sum => v2.sum,
-    :v2_mean => v2.mean,
-  }
-}
-p f.group_by(:v2).table {
-  {
-    :count    => row_number,
-    :namelist => name.sort.join(""),
-  }
-}
-p "--- Pivot"
-p f.pivot({:v1=>CA_INT(1..8)},{:v2=>CA_INT(1..5)}).table {
-  name.size > 0 ? name.join("") : "-"
-}
-g = f.group_by(:v1,:v2)
-p t = g.table {
-  {
-    :count    => row_number,
-    :namelist => name.sort.join(""),
-  }
-}
-p g[[1,3]]
-p t.select { count >= 2 }

data/examples/hist.rb DELETED

@@ -1,27 +0,0 @@
-require "carray"
-text = <<EOS
-name,NAME,a,b,c
-u,U,1,2,3
-v,V,2,3,4
-w,W,5,1,3
-x,X,4,3,1
-y,Y,1,1,2
-z,Z,2,3,1
-EOS
-df = CADataFrame.from_csv(text) {
-  header
-  body
-}.arrange {
-  int :a,:b,:c
-}
-CA.gnuplot {
-  set %{ style histogram rowstacked }
-  set %{ style fill solid border -1 }
-  plot [df.a, "a", "histogram"],
-       [df.b, "b", "histogram"],
-       [df.c, "c", "histogram"],
-       :x=>["NAME",nil,nil, histogram_tics(df.NAME)],
-       :y=>["VALUE",0..20]
-}

data/examples/iris.rb DELETED

@@ -1,29 +0,0 @@
-require "carray"
-require "R"
-require_relative "../lib/carray/dataframe/dataframe"
-R.run
-df = R.iris
-df.lead "id", df.row_index
-p df
-df.to_xlsx('out.xlsx')
-petal = df[["id", "Petal.Length","Petal.Width"]]
-sepal = df[["id", "Sepal.Length","Sepal.Width"]]
-species = df[["id", "Species"]]
-p df["Species"].value_counts
-#p d2 = df.to_daru
-tbl = df.to_sql("iris").to_df %{
-  select * from iris order by Sepal_Width desc;
-}
-p tbl
-#tbl.to_xlsx("out.xlsx")

data/examples/map.rb DELETED

@@ -1,23 +0,0 @@
-require "carray"
-csv =<<CSV
-id,gender,ans1,ans2
-1,F,1,0
-2,F,0,0
-3,M,1,0
-4,M,0,1
-5,F,1,1
-CSV
-df = CADataFrame.from_csv(csv) {
-  header
-  body
-}.arrange {
-  int :id,:ans1,:ans2
-  append :ans1s, ["NG","OK"].to_ca[ans1]
-  append :ans2s, ["NG","OK"].values_at(*ans2.to_a)
-}
-p df
-#p df.group_by(:gender).calculate(:sum)

data/examples/match.rb DELETED

@@ -1,21 +0,0 @@
-require "carray"
-df = CADataFrame.new a: [1,2,3,5,6,7,9,10],
-                     b: [30,20,20,30,20,10,20,30],
-                     c: [2,1,1,1,2,1,2,2]
-a1 = CA_INT([1,2,5,6,7,9])
-a2 = CA_INT([1,2,3,4,5,6,7,8,10,11])
-p df
-p df.matchup(:a, a1)
-df2 = df.matchup(:a, a2)
-df2.arrange {
-  unmask -9999, :b, :c
-}
-p df2
-b1 = CA_INT([10,20,30])
-p df.matchup(:b, b1)

data/examples/test.xlsx DELETED

Binary file

data/examples/test1.rb DELETED

@@ -1,44 +0,0 @@
-require "carray"
-require_relative "../lib/carray/dataframe/dataframe"
-hash =   { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
-p df = CADataFrame.new(hash)
-table = CA_OBJECT([[4,5,6,7], [10,20,30,40], [100,50,-30,-50]]).t
-p df = CADataFrame.new(table, column_names: ['aaa','bbb','ccc'])
-table.extend(CA::TableMethods)
-table.column_names = ['aaa','bbb','ccc']
-p df = CADataFrame.new(table)
-hash =   { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
-p df = CADataFrame.new(hash, row_index: ["a","b","c","d"])
-df.each_row(with: Array) {|row|
-  p row
-}
-df.each_row_with_row_index(with: Array) {|row,i|
-  p [row,i]
-}
-p df.to_ca.column_names
-p df.to_hash
-p df.columns_to_hash("bbb","aaa")
-p df.columns_to_hash("bbb",["aaa"])
-p df.columns_to_hash("bbb",["aaa","ccc"])
-p df.add_suffix("_no")
-p df.transpose
-p df2 = df.to_df
-p df["aaa"]
-p b = df[["aaa"]]
-#b.detouch!
-b[0,0] = -1111
-p df
-p df2

data/examples/test2.rb DELETED

@@ -1,14 +0,0 @@
-# coding: utf-8
-require "carray"
-require_relative "../lib/carray/dataframe/dataframe"
-hash =   { '温度' => [4,5,6,7], '湿度' => [10,20,30,40], '気圧差' => [100,50,-30,-50] }
-df = CADataFrame.new(hash) {
-  double :温度, :湿度
-}
-p df
-p df.温度
-p df.湿度

data/examples/test3.db DELETED

Binary file

data/examples/test3.rb DELETED

@@ -1,11 +0,0 @@
-require "carray"
-require_relative "../lib/carray/dataframe/dataframe"
-hash =   { '番号' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
-p df = CADataFrame.new(hash)
-df.select{ (番号 % 2).eq(1) }[] = UNDEF
-df.to_xlsx("test3.xlsx", with_row_index: true)
-df.to_df.unmask(-999).to_sqlite3(database: "test3.db", table: "test")

data/examples/test3.xlsx DELETED

Binary file

data/examples/to_excel.rb DELETED

@@ -1,27 +0,0 @@
-require "carray"
-df = CADataFrame.new :a=>[1,2,3,4,5,6,7,8,9,10]
-df.arrange {
-  append :b, a**2
-  append :c, a.template(:object).seq("A", :succ)
-}
-df.to_xlsx("test.xlsx") {|sheet|
-  sheet.add_chart(Axlsx::ScatterChart, :start_at => [5,2], :end_at => [10, 15], :title => "Chart") do |chart|
-    chart.add_series :xData => sheet["A2:A11"],
-                     :yData => sheet["B2:B11"],
-                     :labels => sheet["C2:C11"],
-                     :title => 'bob'
-  end
-}
-__END__
-xl = Axlsx::Package.new
-xl.use_shared_strings = true
-sheet = xl.workbook.add_worksheet(name: 'Example')
-sheet.add_row(df.column_names)
-df.each_row do |list|
-  sheet.add_row(list)
-end
-  endxl.serialize("test.xlsx")

data/lib/R.rb DELETED

@@ -1,365 +0,0 @@
-require "rsruby"
-require "rsruby/erobj"
-require "carray"
-class RSRuby
-  #Converts a String representing a 'Ruby-style' R function name into a
-  #String with the real R name according to the rules given in the manual.
-  def RSRuby.convert_method_name (name)
-    if name.length > 1 and name[-1].chr == '_' and name[-2].chr != '_'
-      name = name[0..-2]
-    end
-    name = name.gsub(/__/,'<-')
-    name = name.gsub(/_/, '.')
-    return name
-  end
-end
-def R (expr = nil, hash = {}, &block)
-  if block
-    if expr
-      raise "don't give both of block and expresion"
-    else
-      R.instance_exec(&block)
-    end
-  else
-    return R.call(expr, hash)
-  end
-end
-def R! (expr = nil, hash = {}, &block)
-  if block
-    if expr
-      raise "don't give both of block and expresion"
-    else
-      R.instance_exec(&block)
-    end
-	elsif expr.is_a?(Hash)
-		expr.each do |name, value|
-			R.instance.assign name.to_s, value
-		end
-  else
-    return R.exec(expr, hash)
-  end
-end
-module R
-  class Receiver < ::ERObj
-    def initialize (klass, x)
-      @classname  = klass
-      @attributes = R(%{ attributes(obj) },:obj=>x)
-      super(x)
-    end
-    attr_reader :attributes, :robj
-    def [] (sym = nil)
-      if sym
-        name = sym.to_s
-        name = name.gsub(/_/, '.')
-        begin
-          ret = @r['$'].call(@robj, name)
-        rescue RException
-          ret = @attributes[name]
-        end
-        return __converter__(ret)
-      else
-        return __converter__(to_ruby)
-      end
-    end
-    def __converter__ (arg)
-      case arg
-      when Array
-        return arg.to_ca.map!{|v| __converter__(v) }
-      when Hash
-        new_hash = {}
-        arg.each do |k,v|
-          new_hash[k] = __converter__(v)
-        end
-        return new_hash
-      when RObj
-        return R::CONVERTER[arg]
-      else
-        return arg
-      end
-    end
-    def method
-      return self[:method]
-    end
-    def inspect
-      return "<R:Receiver: class=#{@classname} \n" \
-             "             attributes=#{@attributes.inspect} \n" \
-             "             data=#{to_ruby} >"
-    end
-    def method_missing (sym, *args)
-      if args.empty?
-        return self[sym]
-      else
-        super
-      end
-    end
-    def to_ary
-      return [self.to_s]
-    end
-  end
-  CONVERSION_TABLE = {
-    "data.frame" => lambda{ |x| CADataFrame.from_R_data_frame(x) },
-    "ts" => lambda{ |x| R::TimeSeries.new(x) },
-  }
-  CONVERTER = lambda{|x|
-    case x
-    when RObj
-      klass = @r.eval_R("class").call(x)
-      if CONVERSION_TABLE.has_key?(klass)
-        CONVERSION_TABLE[klass][x]
-      else
-        case val = x.to_ruby
-        when Numeric, String
-          val
-        when Hash, NilClass
-          Receiver.new(klass, x)
-        when Array
-          val = val.to_ca
-          case klass
-          when "character", "factor"
-            val = val.maskout!(R.NA_character_)
-          when "integer"
-            val = val.maskout!(R.NA_integer_).int32
-          when "numeric"
-            val = val.maskout!(R.NA_real_).double
-          end
-          val
-        else
-          val
-        end
-      end
-    else
-      x
-    end
-  }
-  def self.run
-    if @r
-      return nil
-    end
-    ENV["LANG"]   = "en_US.UTF-8"
-    ENV["LC_ALL"] = "en_US.UTF-8"
-    @r = RSRuby.instance
-    RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
-    @r.class_table['data.frame'] = lambda{|x| ERObj.new(x) }
-    @r.class_table['matrix'] = lambda{|x| ERObj.new(x) }
-    @r.proc_table[lambda{|x| true }] = CONVERTER
-    @NA_integer_   = R %{ NA_integer_ }
-    @NA_real_      = R %{ NA_real_ }
-    @NA_character_ = R %{ NA_character_ }
-    ObjectSpace.define_finalizer(self, proc{ @r.shutdown })
-    return nil
-  end
-  def self.stop
-    @r.shutdown
-    @r = nil
-  end
-  class << self
-    attr_reader :NA_integer_, :NA_real_, :NA_character_
-  end
-  def self.instance
-    return @r
-  end
-  def self.exec (expr, hash = {})
-    hash.each do |name, value|
-      @r.assign(name.to_s, __converter__(value))
-    end
-    return @r.eval_R(expr)
-  end
-  def self.call (expr, hash = {})
-    names = ["DU33Y"]
-    args  = [0]
-    hash.each do |name, value|
-      names.push(name.to_s)
-      args.push(__converter__(value))
-    end
-    expr = "function (#{names.join(",")}) {" + expr + "}"
-    return @r.eval_R(expr).call(*args)
-  end
-  def self.__converter__ (arg)
-    case arg
-    when Symbol
-      return arg.to_s
-    when CArray
-      return __converter__(arg.as_r)
-    when CADataFrame
-      return arg.as_r
-    when Array
-      return arg.map{|v| __converter__(v) }
-    when Hash
-      new_hash = {}
-      arg.each do |k,v|
-        new_hash[k] = __converter__(v)
-      end
-      return new_hash
-    else
-      return arg
-    end
-  end
-  def self.method_missing (sym, *args)
-    if args.empty? and sym.to_s[-1] == "!"
-      return @r.send(sym.to_s[0..-2].intern).call()
-    elsif args.size == 1 and sym.to_s[-1] == "="
-      return @r.assign(sym.to_s[0..-2], __converter__(args[0]))
-    else
-      return @r.send(sym, *args.map{|v| __converter__(v)})
-    end
-  end
-end
-class CArray
-  def guess_column_type_for_R
-    if is_a?(CArray)
-      if integer?
-        "integer"
-      elsif float?
-        "numeric"
-      elsif object?
-        notmasked = self[:is_not_masked].to_ca
-        if notmasked.convert(:boolean){|v| v.is_a?(Integer) }.all_equal?(1)
-          "integer"
-        elsif notmasked.convert(:boolean){|v| v.is_a?(Numeric) }.all_equal?(1)
-          "numeric"
-        elsif notmasked.convert(:boolean){|v| v.is_a?(String) }.all_equal?(1)
-          "character"
-        else
-          "unknown"
-        end
-      end
-    else
-      raise "invalid column name"
-    end
-  end
-  def as_r
-    if has_mask?
-      case guess_column_type_for_R
-      when "integer"
-        out = unmask_copy(R.NA_integer_)
-      when "numeric"
-        out = unmask_copy(R.NA_real_)
-      else
-        out = unmask_copy(R.NA_character_)
-      end
-		else
-			out = self
-    end
-	  if rank == 1
-      return out.to_a
-    elsif rank == 2
-			begin
-		    mode = RSRuby.get_default_mode
-	      RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
-	      return R.matrix(out.flatten.to_a, :nrow=>dim0)
-	    ensure
-	      RSRuby.set_default_mode(mode)
-	    end
-		else
-			return out.to_a
-		end
-  end
-end
-class CADataFrame
-  def self.from_R_data_frame (obj)
-    r = R.instance
-    RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
-    r.proc_table[lambda{|x| true }] = R::CONVERTER
-    dataframe = obj
-    column_names = r.colnames(obj).to_a
-    column_names = [column_names].flatten
-    row_names = r.attr(obj, 'row.names')
-    columns = {}
-    column_names.each do |name|
-      value = r['$'].call(obj, name.to_s)
-      case value
-      when CArray
-        columns[name] = value
-      when Array
-        columns[name] = value.to_ca
-      else
-        columns[name] = [value].to_ca
-      end
-    end
-    column_names.each do |name|
-      column = columns[name]
-      column.maskout!(nil)
-    end
-    return CADataFrame.new(columns, row_index: row_names ? row_names.to_ca : nil)
-  end
-  def as_r
-    r = R.instance
-    new_columns = {}
-    @column_names.each do |name|
-      column = @columns[name]
-      if column.has_mask?
-        case column.guess_column_type_for_R
-        when "integer"
-          column = column.unmask_copy(R.NA_integer_)
-        when "numeric"
-          column = column.unmask_copy(R.NA_real_)
-        else
-          column = column.unmask_copy(R.NA_character_)
-        end
-      end
-      new_columns[name] = R.__converter__(column.to_a)
-    end
-    mode = RSRuby.get_default_mode
-     RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
-    return r.as_data_frame(:x => new_columns)
-  ensure
-    RSRuby.set_default_mode(mode)
-  end
-end
-class R::TimeSeries < ERObj
-  def start
-    return R.start(self)
-  end
-  def end
-    return R.end(self)
-  end
-  def frequency
-    return R.frequency(self)
-  end
-  def length
-    return R.length(self)
-  end
-end