carray-dataframe 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7f93c348e3fd8e166ddba89d5ddc1f0fb86653a6
4
+ data.tar.gz: cb16a824a9e0c2aaf40db3f26328176ef9eb882e
5
+ SHA512:
6
+ metadata.gz: 8f96c5cf91470a732a09aa1e3629b94963d29d786dbfa9768430c18e0ebe1b2219f7256ddc17c783ce8cac9253c19c4cbce9e16d005435f0f718f97c788b832d
7
+ data.tar.gz: b982e8a4b8f162f69bb86fd31d0d92d1d86c7c06b663b9cc091974fe46fcdbf16635e3e793cc91b813fd54cff29e7135416c449ba0089985332dad354d2a18a9
data/API.txt ADDED
@@ -0,0 +1,83 @@
1
+
2
+ ### Constructor
3
+
4
+ CADataFrame.new(columns_or_table, row_index: nil, column_names: nil)
5
+ CADataFrame.new(columns_or_table, row_index: nil, column_names: nil) { ... }
6
+
7
+ If block specified, arrange is called internaly with the block.
8
+
9
+ ### Attributes
10
+
11
+ CADataFrame#column_number
12
+ CADataFrame#column_names
13
+ CADataFrame#column_types
14
+ CADataFrame#columns
15
+ CADataFrame#row_index
16
+ CADataFrame#row_number
17
+
18
+ ### Index Access
19
+
20
+ df[["AAA"]] => CADataFrame include column "AAA"
21
+ df[["AAA","BBB"]] => CADataFrame include column "AAA", "BBB"
22
+
23
+ df["AAA"] => 1 dimensional CArray
24
+ df[["AAA"]].to_ca => 2 dimensional CArray with column_names
25
+ df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
26
+
27
+
28
+ df[dfmask]
29
+ return new detached CADataFrame masked where dfmask's value equal 1
30
+
31
+
32
+ ### Iterators
33
+
34
+ CADataFrame#each_column_name { |name| ... }
35
+ CADataFrame#each_column { |name, column| ... }
36
+ CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
37
+ CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
38
+
39
+ ### Transformation
40
+
41
+ CADataFrame#add_suffix(suffix_string) -> CADataFrame
42
+ Add suffix_string to all column names
43
+
44
+ CADataFrame#transpose(column_names: )
45
+
46
+
47
+ ### Conversion
48
+
49
+ CADataFrame#ca -> CADFArray (Reference Array)
50
+ CADataFrame#to_ca -> CArray with CA::TableMethods
51
+ CADataFrame#to_hash -> Hash
52
+ CADataFrame#to_xlsx(with_row_index: false) -> Hash
53
+ Masked element converted to "=NA()"
54
+
55
+ CADataFrame#columns_to_hash(key_name, *value_names)
56
+
57
+ ex) df.columns_to_hash("bbb",["aaa","ccc"])
58
+
59
+ ---------------
60
+ aaa bbb ccc
61
+ ---------------
62
+ 4 10 100
63
+ 5 20 50
64
+ 6 30 -30
65
+ 7 40 -50
66
+ ---------------
67
+
68
+ {10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
69
+
70
+
71
+
72
+
73
+ CADataFrame
74
+
75
+ #append(name) { INSTANCE_CONTEXT } <- any carray
76
+ #lead(name) { INSTANCE_CONTEXT } <- any carray
77
+ #execute { INSTANCE_CONTEXT } => any object
78
+ #select(name...) { INSTANCE_CONTEXT } <- boolean carray
79
+ #reorder { INSTANCE_CONTEXT } <- int32 carray (addresses for mapping)
80
+ #order_by { INSTANCE_CONTEXT } <- Array of int32 carray or carray (addresses for mapping)
81
+
82
+ #calculate {|label, column| CALLER_CONTEXT } <- scalar
83
+ #resample {|label, column| CALLER_CONTEXT } <- any carray
@@ -0,0 +1,5 @@
1
+ carray-dataframe
2
+ ================
3
+
4
+
5
+ gem install rsruby
@@ -0,0 +1,25 @@
1
+
2
+ Gem::Specification::new do |s|
3
+ version = "1.0.0"
4
+
5
+ files = Dir.glob("**/*") - [
6
+ Dir.glob("carray*.gem"),
7
+ ].flatten
8
+
9
+ s.platform = Gem::Platform::RUBY
10
+ s.name = "carray-dataframe"
11
+ s.summary = "Extension for realizing DataFrame of R in Ruby"
12
+ s.description = <<-HERE
13
+ Extension for realizing DataFrame of R in Ruby
14
+ HERE
15
+ s.version = version
16
+ s.author = "Hiroki Motoyoshi"
17
+ s.email = ""
18
+ s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
19
+ s.files = files
20
+ s.has_rdoc = false
21
+ s.required_ruby_version = ">= 1.8.1"
22
+ s.add_runtime_dependency 'carray', '~> 1.1'
23
+ s.add_runtime_dependency 'axlsx', '~> 2.0'
24
+ s.add_runtime_dependency 'spreadsheet', '~> 1.1'
25
+ end
@@ -0,0 +1,24 @@
1
+ require "carray"
2
+ require "R"
3
+
4
+ R.run
5
+
6
+ x = CArray.float(200).span(0..4r)
7
+ v = x.random(4)-2
8
+
9
+ a = 3
10
+ b = 5
11
+ c = 7
12
+ y = a*x**2 + b*x + c + v
13
+
14
+ res = R %{
15
+ nls(y ~ a*x^2 + b*x + c, start=c(a=100,b=1,c=1), trace=TRUE)
16
+ }, :x=>x, :y=>y
17
+
18
+ a1,b1,c1 = R.coef(res).to_ruby.values_at("a","b","c")
19
+
20
+ CA.gnuplot {
21
+ plot [x,y],
22
+ [x,a1*x**2+b1*x+c, nil, "lines"]
23
+ }
24
+
@@ -0,0 +1,9 @@
1
+ require "R"
2
+
3
+ R.run
4
+
5
+ iris = R.iris
6
+
7
+ CA.gnuplot {
8
+ plot [iris.Sepal_Length, iris.Sepal_Width]
9
+ }
@@ -0,0 +1,30 @@
1
+ #
2
+ # From https://oku.edu.mie-u.ac.jp/~okumura/stat/100410a.html
3
+ #
4
+
5
+ require "R"
6
+
7
+ R.run
8
+
9
+ areaname = ["北海道","本州","四国","九州","沖縄"].to_ca
10
+ areasize = [83457,231113,18792,42191,2276].to_ca / 10000.0
11
+
12
+ R %{
13
+ par(family="HiraKakuProN-W3")
14
+ par(las=1)
15
+ par(mgp=c(2,0.8,0))
16
+ barplot(areasize, names.arg=areaname)
17
+ axis(2, labels="面積 (万km^2)", at=20, hadj=0.3, padj=-1, tick=FALSE)
18
+ }, :areasize=>areasize, :areaname=>areaname
19
+
20
+ gets
21
+
22
+ R {
23
+ par :family=>"HiraKakuProN-W3"
24
+ par :las=>1
25
+ par :mgp=>[2,0.8,0]
26
+ barplot areasize, "names.arg"=>areaname
27
+ axis 2, :labels=>"面積 (万km^2)", :at=>20, :hadj=>0.3, :padj=>-1, :tick=>false
28
+ }
29
+
30
+ gets
@@ -0,0 +1,22 @@
1
+ require "carray"
2
+
3
+ CA.gnuplot {
4
+ terminal %{ wxt }
5
+ (1..10).each do |n|
6
+ x = CArray.double(1000000) {0}
7
+ n.times do
8
+ x += CArray.double(1000000).random
9
+ end
10
+ x = x/n
11
+ df = CADataFrame.new(:x=>x)
12
+ h = df.histogram(:x, CA_DOUBLE(0..1,0.01))
13
+
14
+ plot [h.x, h.count, nil, "boxes fill solid 0.5 noborder"],
15
+ :x=>[nil, 0..1],
16
+ :title=>n.to_s,
17
+ :nopause=>true
18
+
19
+ sleep 0.5
20
+ end
21
+ gets
22
+ }
@@ -0,0 +1,78 @@
1
+ require "carray"
2
+
3
+ csv =<<HERE
4
+ name,v1,v2
5
+ A,1,3
6
+ B,3,2
7
+ C,2,1
8
+ B,1,3
9
+ C,1,4
10
+ A,4,2
11
+ B,5,3
12
+ C,3,3
13
+ C,1,1
14
+ C,6,3
15
+ C,8,1
16
+ A,1,2
17
+ HERE
18
+
19
+ f = CADataFrame.from_csv(csv) {
20
+ header
21
+ body
22
+ }.arrange {
23
+ int :v1, :v2
24
+ }
25
+
26
+ p f.resample { |l, c|
27
+ c = c.reshape(false,2)
28
+ case l
29
+ when "name"
30
+ c[nil,-1]
31
+ else
32
+ c.max(1)
33
+ end
34
+ }
35
+
36
+ p df = CADataFrame.concat(f.calculate(:sum),
37
+ f.calculate(:mean)).arrange {
38
+ eliminate :name
39
+ append :sum, v1 + v2
40
+ }
41
+
42
+ p f.group_by(:name).table {
43
+ {
44
+ :count => row_number,
45
+ :v1_sum => v1.sum,
46
+ :v1_mean => v1.mean,
47
+ :v2_sum => v2.sum,
48
+ :v2_mean => v2.mean,
49
+ }
50
+ }
51
+
52
+ p f.group_by(:v2).table {
53
+ {
54
+ :count => row_number,
55
+ :namelist => name.sort.join(""),
56
+ }
57
+ }
58
+
59
+ p "--- Pivot"
60
+ p f.pivot({:v1=>CA_INT(1..8)},{:v2=>CA_INT(1..5)}).table {
61
+ name.size > 0 ? name.join("") : "-"
62
+ }
63
+
64
+ g = f.group_by(:v1,:v2)
65
+
66
+ p t = g.table {
67
+ {
68
+ :count => row_number,
69
+ :namelist => name.sort.join(""),
70
+ }
71
+ }
72
+
73
+ p g[[1,3]]
74
+
75
+ p t.select { count >= 2 }
76
+
77
+
78
+
@@ -0,0 +1,27 @@
1
+ require "carray"
2
+ text = <<EOS
3
+ name,NAME,a,b,c
4
+ u,U,1,2,3
5
+ v,V,2,3,4
6
+ w,W,5,1,3
7
+ x,X,4,3,1
8
+ y,Y,1,1,2
9
+ z,Z,2,3,1
10
+ EOS
11
+
12
+ df = CADataFrame.from_csv(text) {
13
+ header
14
+ body
15
+ }.arrange {
16
+ int :a,:b,:c
17
+ }
18
+
19
+ CA.gnuplot {
20
+ set %{ style histogram rowstacked }
21
+ set %{ style fill solid border -1 }
22
+ plot [df.a, "a", "histogram"],
23
+ [df.b, "b", "histogram"],
24
+ [df.c, "c", "histogram"],
25
+ :x=>["NAME",nil,nil, histogram_tics(df.NAME)],
26
+ :y=>["VALUE",0..20]
27
+ }
@@ -0,0 +1,29 @@
1
+ require "carray"
2
+ require "R"
3
+ require_relative "../lib/carray/dataframe/dataframe"
4
+
5
+ R.run
6
+
7
+ df = R.iris
8
+ df.lead "id", df.row_index
9
+
10
+ p df
11
+
12
+ df.to_xlsx('out.xlsx')
13
+
14
+ petal = df[["id", "Petal.Length","Petal.Width"]]
15
+ sepal = df[["id", "Sepal.Length","Sepal.Width"]]
16
+ species = df[["id", "Species"]]
17
+
18
+
19
+ p df["Species"].value_counts
20
+
21
+ #p d2 = df.to_daru
22
+
23
+ tbl = df.to_sql("iris").to_df %{
24
+ select * from iris order by Sepal_Width desc;
25
+ }
26
+
27
+ p tbl
28
+
29
+ #tbl.to_xlsx("out.xlsx")
@@ -0,0 +1,23 @@
1
+ require "carray"
2
+
3
+ csv =<<CSV
4
+ id,gender,ans1,ans2
5
+ 1,F,1,0
6
+ 2,F,0,0
7
+ 3,M,1,0
8
+ 4,M,0,1
9
+ 5,F,1,1
10
+ CSV
11
+
12
+ df = CADataFrame.from_csv(csv) {
13
+ header
14
+ body
15
+ }.arrange {
16
+ int :id,:ans1,:ans2
17
+ append :ans1s, ["NG","OK"].to_ca[ans1]
18
+ append :ans2s, ["NG","OK"].values_at(*ans2.to_a)
19
+ }
20
+
21
+ p df
22
+
23
+ #p df.group_by(:gender).calculate(:sum)
@@ -0,0 +1,21 @@
1
+ require "carray"
2
+
3
+ df = CADataFrame.new a: [1,2,3,5,6,7,9,10],
4
+ b: [30,20,20,30,20,10,20,30],
5
+ c: [2,1,1,1,2,1,2,2]
6
+
7
+ a1 = CA_INT([1,2,5,6,7,9])
8
+ a2 = CA_INT([1,2,3,4,5,6,7,8,10,11])
9
+
10
+ p df
11
+ p df.matchup(:a, a1)
12
+ df2 = df.matchup(:a, a2)
13
+
14
+ df2.arrange {
15
+ unmask -9999, :b, :c
16
+ }
17
+
18
+ p df2
19
+
20
+ b1 = CA_INT([10,20,30])
21
+ p df.matchup(:b, b1)
Binary file
@@ -0,0 +1,44 @@
1
+ require "carray"
2
+ require_relative "../lib/carray/dataframe/dataframe"
3
+
4
+ hash = { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
5
+ p df = CADataFrame.new(hash)
6
+
7
+ table = CA_OBJECT([[4,5,6,7], [10,20,30,40], [100,50,-30,-50]]).t
8
+ p df = CADataFrame.new(table, column_names: ['aaa','bbb','ccc'])
9
+
10
+ table.extend(CA::TableMethods)
11
+ table.column_names = ['aaa','bbb','ccc']
12
+ p df = CADataFrame.new(table)
13
+
14
+ hash = { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
15
+ p df = CADataFrame.new(hash, row_index: ["a","b","c","d"])
16
+
17
+ df.each_row(with: Array) {|row|
18
+ p row
19
+ }
20
+
21
+ df.each_row_with_row_index(with: Array) {|row,i|
22
+ p [row,i]
23
+ }
24
+
25
+ p df.to_ca.column_names
26
+ p df.to_hash
27
+ p df.columns_to_hash("bbb","aaa")
28
+ p df.columns_to_hash("bbb",["aaa"])
29
+ p df.columns_to_hash("bbb",["aaa","ccc"])
30
+
31
+ p df.add_suffix("_no")
32
+ p df.transpose
33
+
34
+ p df2 = df.to_df
35
+
36
+ p df["aaa"]
37
+ p b = df[["aaa"]]
38
+ #b.detouch!
39
+
40
+ b[0,0] = -1111
41
+
42
+ p df
43
+ p df2
44
+