carray-dataframe 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7f93c348e3fd8e166ddba89d5ddc1f0fb86653a6
4
+ data.tar.gz: cb16a824a9e0c2aaf40db3f26328176ef9eb882e
5
+ SHA512:
6
+ metadata.gz: 8f96c5cf91470a732a09aa1e3629b94963d29d786dbfa9768430c18e0ebe1b2219f7256ddc17c783ce8cac9253c19c4cbce9e16d005435f0f718f97c788b832d
7
+ data.tar.gz: b982e8a4b8f162f69bb86fd31d0d92d1d86c7c06b663b9cc091974fe46fcdbf16635e3e793cc91b813fd54cff29e7135416c449ba0089985332dad354d2a18a9
data/API.txt ADDED
@@ -0,0 +1,83 @@
1
+
2
+ ### Constructor
3
+
4
+ CADataFrame.new(columns_or_table, row_index: nil, column_names: nil)
5
+ CADataFrame.new(columns_or_table, row_index: nil, column_names: nil) { ... }
6
+
7
+ If block specified, arrange is called internaly with the block.
8
+
9
+ ### Attributes
10
+
11
+ CADataFrame#column_number
12
+ CADataFrame#column_names
13
+ CADataFrame#column_types
14
+ CADataFrame#columns
15
+ CADataFrame#row_index
16
+ CADataFrame#row_number
17
+
18
+ ### Index Access
19
+
20
+ df[["AAA"]] => CADataFrame include column "AAA"
21
+ df[["AAA","BBB"]] => CADataFrame include column "AAA", "BBB"
22
+
23
+ df["AAA"] => 1 dimensional CArray
24
+ df[["AAA"]].to_ca => 2 dimensional CArray with column_names
25
+ df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
26
+
27
+
28
+ df[dfmask]
29
+ return new detached CADataFrame masked where dfmask's value equal 1
30
+
31
+
32
+ ### Iterators
33
+
34
+ CADataFrame#each_column_name { |name| ... }
35
+ CADataFrame#each_column { |name, column| ... }
36
+ CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
37
+ CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
38
+
39
+ ### Transformation
40
+
41
+ CADataFrame#add_suffix(suffix_string) -> CADataFrame
42
+ Add suffix_string to all column names
43
+
44
+ CADataFrame#transpose(column_names: )
45
+
46
+
47
+ ### Conversion
48
+
49
+ CADataFrame#ca -> CADFArray (Reference Array)
50
+ CADataFrame#to_ca -> CArray with CA::TableMethods
51
+ CADataFrame#to_hash -> Hash
52
+ CADataFrame#to_xlsx(with_row_index: false) -> Hash
53
+ Masked element converted to "=NA()"
54
+
55
+ CADataFrame#columns_to_hash(key_name, *value_names)
56
+
57
+ ex) df.columns_to_hash("bbb",["aaa","ccc"])
58
+
59
+ ---------------
60
+ aaa bbb ccc
61
+ ---------------
62
+ 4 10 100
63
+ 5 20 50
64
+ 6 30 -30
65
+ 7 40 -50
66
+ ---------------
67
+
68
+ {10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
69
+
70
+
71
+
72
+
73
+ CADataFrame
74
+
75
+ #append(name) { INSTANCE_CONTEXT } <- any carray
76
+ #lead(name) { INSTANCE_CONTEXT } <- any carray
77
+ #execute { INSTANCE_CONTEXT } => any object
78
+ #select(name...) { INSTANCE_CONTEXT } <- boolean carray
79
+ #reorder { INSTANCE_CONTEXT } <- int32 carray (addresses for mapping)
80
+ #order_by { INSTANCE_CONTEXT } <- Array of int32 carray or carray (addresses for mapping)
81
+
82
+ #calculate {|label, column| CALLER_CONTEXT } <- scalar
83
+ #resample {|label, column| CALLER_CONTEXT } <- any carray
@@ -0,0 +1,5 @@
1
+ carray-dataframe
2
+ ================
3
+
4
+
5
+ gem install rsruby
@@ -0,0 +1,25 @@
1
+
2
+ Gem::Specification::new do |s|
3
+ version = "1.0.0"
4
+
5
+ files = Dir.glob("**/*") - [
6
+ Dir.glob("carray*.gem"),
7
+ ].flatten
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.name = "carray-dataframe"
11
+ s.summary = "Extension for realizing DataFrame of R in Ruby"
12
+ s.description = <<-HERE
13
+ Extension for realizing DataFrame of R in Ruby
14
+ HERE
15
+ s.version = version
16
+ s.author = "Hiroki Motoyoshi"
17
+ s.email = ""
18
+ s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
19
+ s.files = files
20
+ s.has_rdoc = false
21
+ s.required_ruby_version = ">= 1.8.1"
22
+ s.add_runtime_dependency 'carray', '~> 1.1'
23
+ s.add_runtime_dependency 'axlsx', '~> 2.0'
24
+ s.add_runtime_dependency 'spreadsheet', '~> 1.1'
25
+ end
@@ -0,0 +1,24 @@
1
+ require "carray"
2
+ require "R"
3
+
4
+ R.run
5
+
6
+ x = CArray.float(200).span(0..4r)
7
+ v = x.random(4)-2
8
+
9
+ a = 3
10
+ b = 5
11
+ c = 7
12
+ y = a*x**2 + b*x + c + v
13
+
14
+ res = R %{
15
+ nls(y ~ a*x^2 + b*x + c, start=c(a=100,b=1,c=1), trace=TRUE)
16
+ }, :x=>x, :y=>y
17
+
18
+ a1,b1,c1 = R.coef(res).to_ruby.values_at("a","b","c")
19
+
20
+ CA.gnuplot {
21
+ plot [x,y],
22
+ [x,a1*x**2+b1*x+c, nil, "lines"]
23
+ }
24
+
@@ -0,0 +1,9 @@
1
+ require "R"
2
+
3
+ R.run
4
+
5
+ iris = R.iris
6
+
7
+ CA.gnuplot {
8
+ plot [iris.Sepal_Length, iris.Sepal_Width]
9
+ }
@@ -0,0 +1,30 @@
1
+ #
2
+ # From https://oku.edu.mie-u.ac.jp/~okumura/stat/100410a.html
3
+ #
4
+
5
+ require "R"
6
+
7
+ R.run
8
+
9
+ areaname = ["北海道","本州","四国","九州","沖縄"].to_ca
10
+ areasize = [83457,231113,18792,42191,2276].to_ca / 10000.0
11
+
12
+ R %{
13
+ par(family="HiraKakuProN-W3")
14
+ par(las=1)
15
+ par(mgp=c(2,0.8,0))
16
+ barplot(areasize, names.arg=areaname)
17
+ axis(2, labels="面積 (万km^2)", at=20, hadj=0.3, padj=-1, tick=FALSE)
18
+ }, :areasize=>areasize, :areaname=>areaname
19
+
20
+ gets
21
+
22
+ R {
23
+ par :family=>"HiraKakuProN-W3"
24
+ par :las=>1
25
+ par :mgp=>[2,0.8,0]
26
+ barplot areasize, "names.arg"=>areaname
27
+ axis 2, :labels=>"面積 (万km^2)", :at=>20, :hadj=>0.3, :padj=>-1, :tick=>false
28
+ }
29
+
30
+ gets
@@ -0,0 +1,22 @@
1
+ require "carray"
2
+
3
+ CA.gnuplot {
4
+ terminal %{ wxt }
5
+ (1..10).each do |n|
6
+ x = CArray.double(1000000) {0}
7
+ n.times do
8
+ x += CArray.double(1000000).random
9
+ end
10
+ x = x/n
11
+ df = CADataFrame.new(:x=>x)
12
+ h = df.histogram(:x, CA_DOUBLE(0..1,0.01))
13
+
14
+ plot [h.x, h.count, nil, "boxes fill solid 0.5 noborder"],
15
+ :x=>[nil, 0..1],
16
+ :title=>n.to_s,
17
+ :nopause=>true
18
+
19
+ sleep 0.5
20
+ end
21
+ gets
22
+ }
@@ -0,0 +1,78 @@
1
+ require "carray"
2
+
3
+ csv =<<HERE
4
+ name,v1,v2
5
+ A,1,3
6
+ B,3,2
7
+ C,2,1
8
+ B,1,3
9
+ C,1,4
10
+ A,4,2
11
+ B,5,3
12
+ C,3,3
13
+ C,1,1
14
+ C,6,3
15
+ C,8,1
16
+ A,1,2
17
+ HERE
18
+
19
+ f = CADataFrame.from_csv(csv) {
20
+ header
21
+ body
22
+ }.arrange {
23
+ int :v1, :v2
24
+ }
25
+
26
+ p f.resample { |l, c|
27
+ c = c.reshape(false,2)
28
+ case l
29
+ when "name"
30
+ c[nil,-1]
31
+ else
32
+ c.max(1)
33
+ end
34
+ }
35
+
36
+ p df = CADataFrame.concat(f.calculate(:sum),
37
+ f.calculate(:mean)).arrange {
38
+ eliminate :name
39
+ append :sum, v1 + v2
40
+ }
41
+
42
+ p f.group_by(:name).table {
43
+ {
44
+ :count => row_number,
45
+ :v1_sum => v1.sum,
46
+ :v1_mean => v1.mean,
47
+ :v2_sum => v2.sum,
48
+ :v2_mean => v2.mean,
49
+ }
50
+ }
51
+
52
+ p f.group_by(:v2).table {
53
+ {
54
+ :count => row_number,
55
+ :namelist => name.sort.join(""),
56
+ }
57
+ }
58
+
59
+ p "--- Pivot"
60
+ p f.pivot({:v1=>CA_INT(1..8)},{:v2=>CA_INT(1..5)}).table {
61
+ name.size > 0 ? name.join("") : "-"
62
+ }
63
+
64
+ g = f.group_by(:v1,:v2)
65
+
66
+ p t = g.table {
67
+ {
68
+ :count => row_number,
69
+ :namelist => name.sort.join(""),
70
+ }
71
+ }
72
+
73
+ p g[[1,3]]
74
+
75
+ p t.select { count >= 2 }
76
+
77
+
78
+
@@ -0,0 +1,27 @@
1
+ require "carray"
2
+ text = <<EOS
3
+ name,NAME,a,b,c
4
+ u,U,1,2,3
5
+ v,V,2,3,4
6
+ w,W,5,1,3
7
+ x,X,4,3,1
8
+ y,Y,1,1,2
9
+ z,Z,2,3,1
10
+ EOS
11
+
12
+ df = CADataFrame.from_csv(text) {
13
+ header
14
+ body
15
+ }.arrange {
16
+ int :a,:b,:c
17
+ }
18
+
19
+ CA.gnuplot {
20
+ set %{ style histogram rowstacked }
21
+ set %{ style fill solid border -1 }
22
+ plot [df.a, "a", "histogram"],
23
+ [df.b, "b", "histogram"],
24
+ [df.c, "c", "histogram"],
25
+ :x=>["NAME",nil,nil, histogram_tics(df.NAME)],
26
+ :y=>["VALUE",0..20]
27
+ }
@@ -0,0 +1,29 @@
1
+ require "carray"
2
+ require "R"
3
+ require_relative "../lib/carray/dataframe/dataframe"
4
+
5
+ R.run
6
+
7
+ df = R.iris
8
+ df.lead "id", df.row_index
9
+
10
+ p df
11
+
12
+ df.to_xlsx('out.xlsx')
13
+
14
+ petal = df[["id", "Petal.Length","Petal.Width"]]
15
+ sepal = df[["id", "Sepal.Length","Sepal.Width"]]
16
+ species = df[["id", "Species"]]
17
+
18
+
19
+ p df["Species"].value_counts
20
+
21
+ #p d2 = df.to_daru
22
+
23
+ tbl = df.to_sql("iris").to_df %{
24
+ select * from iris order by Sepal_Width desc;
25
+ }
26
+
27
+ p tbl
28
+
29
+ #tbl.to_xlsx("out.xlsx")
@@ -0,0 +1,23 @@
1
+ require "carray"
2
+
3
+ csv =<<CSV
4
+ id,gender,ans1,ans2
5
+ 1,F,1,0
6
+ 2,F,0,0
7
+ 3,M,1,0
8
+ 4,M,0,1
9
+ 5,F,1,1
10
+ CSV
11
+
12
+ df = CADataFrame.from_csv(csv) {
13
+ header
14
+ body
15
+ }.arrange {
16
+ int :id,:ans1,:ans2
17
+ append :ans1s, ["NG","OK"].to_ca[ans1]
18
+ append :ans2s, ["NG","OK"].values_at(*ans2.to_a)
19
+ }
20
+
21
+ p df
22
+
23
+ #p df.group_by(:gender).calculate(:sum)
@@ -0,0 +1,21 @@
1
+ require "carray"
2
+
3
+ df = CADataFrame.new a: [1,2,3,5,6,7,9,10],
4
+ b: [30,20,20,30,20,10,20,30],
5
+ c: [2,1,1,1,2,1,2,2]
6
+
7
+ a1 = CA_INT([1,2,5,6,7,9])
8
+ a2 = CA_INT([1,2,3,4,5,6,7,8,10,11])
9
+
10
+ p df
11
+ p df.matchup(:a, a1)
12
+ df2 = df.matchup(:a, a2)
13
+
14
+ df2.arrange {
15
+ unmask -9999, :b, :c
16
+ }
17
+
18
+ p df2
19
+
20
+ b1 = CA_INT([10,20,30])
21
+ p df.matchup(:b, b1)
Binary file
@@ -0,0 +1,44 @@
1
+ require "carray"
2
+ require_relative "../lib/carray/dataframe/dataframe"
3
+
4
+ hash = { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
5
+ p df = CADataFrame.new(hash)
6
+
7
+ table = CA_OBJECT([[4,5,6,7], [10,20,30,40], [100,50,-30,-50]]).t
8
+ p df = CADataFrame.new(table, column_names: ['aaa','bbb','ccc'])
9
+
10
+ table.extend(CA::TableMethods)
11
+ table.column_names = ['aaa','bbb','ccc']
12
+ p df = CADataFrame.new(table)
13
+
14
+ hash = { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
15
+ p df = CADataFrame.new(hash, row_index: ["a","b","c","d"])
16
+
17
+ df.each_row(with: Array) {|row|
18
+ p row
19
+ }
20
+
21
+ df.each_row_with_row_index(with: Array) {|row,i|
22
+ p [row,i]
23
+ }
24
+
25
+ p df.to_ca.column_names
26
+ p df.to_hash
27
+ p df.columns_to_hash("bbb","aaa")
28
+ p df.columns_to_hash("bbb",["aaa"])
29
+ p df.columns_to_hash("bbb",["aaa","ccc"])
30
+
31
+ p df.add_suffix("_no")
32
+ p df.transpose
33
+
34
+ p df2 = df.to_df
35
+
36
+ p df["aaa"]
37
+ p b = df[["aaa"]]
38
+ #b.detouch!
39
+
40
+ b[0,0] = -1111
41
+
42
+ p df
43
+ p df2
44
+