carray-dataframe 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/API.txt +83 -0
- data/README.md +5 -0
- data/carray-dataframe.gemspec +25 -0
- data/examples/R/fit.rb +24 -0
- data/examples/R/iris.rb +9 -0
- data/examples/R/japan_area.rb +30 -0
- data/examples/R/kyaku.rb +22 -0
- data/examples/group_by.rb +78 -0
- data/examples/hist.rb +27 -0
- data/examples/iris.rb +29 -0
- data/examples/map.rb +23 -0
- data/examples/match.rb +21 -0
- data/examples/test.xlsx +0 -0
- data/examples/test1.rb +44 -0
- data/examples/test2.rb +14 -0
- data/examples/test3.db +0 -0
- data/examples/test3.rb +11 -0
- data/examples/test3.xlsx +0 -0
- data/examples/to_excel.rb +27 -0
- data/lib/R.rb +365 -0
- data/lib/carray/autoload/autoload_dataframe_dataframe.rb +26 -0
- data/lib/carray/dataframe/dataframe.rb +1640 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 7f93c348e3fd8e166ddba89d5ddc1f0fb86653a6
|
4
|
+
data.tar.gz: cb16a824a9e0c2aaf40db3f26328176ef9eb882e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8f96c5cf91470a732a09aa1e3629b94963d29d786dbfa9768430c18e0ebe1b2219f7256ddc17c783ce8cac9253c19c4cbce9e16d005435f0f718f97c788b832d
|
7
|
+
data.tar.gz: b982e8a4b8f162f69bb86fd31d0d92d1d86c7c06b663b9cc091974fe46fcdbf16635e3e793cc91b813fd54cff29e7135416c449ba0089985332dad354d2a18a9
|
data/API.txt
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
|
2
|
+
### Constructor
|
3
|
+
|
4
|
+
CADataFrame.new(columns_or_table, row_index: nil, column_names: nil)
|
5
|
+
CADataFrame.new(columns_or_table, row_index: nil, column_names: nil) { ... }
|
6
|
+
|
7
|
+
If block specified, arrange is called internaly with the block.
|
8
|
+
|
9
|
+
### Attributes
|
10
|
+
|
11
|
+
CADataFrame#column_number
|
12
|
+
CADataFrame#column_names
|
13
|
+
CADataFrame#column_types
|
14
|
+
CADataFrame#columns
|
15
|
+
CADataFrame#row_index
|
16
|
+
CADataFrame#row_number
|
17
|
+
|
18
|
+
### Index Access
|
19
|
+
|
20
|
+
df[["AAA"]] => CADataFrame include column "AAA"
|
21
|
+
df[["AAA","BBB"]] => CADataFrame include column "AAA", "BBB"
|
22
|
+
|
23
|
+
df["AAA"] => 1 dimensional CArray
|
24
|
+
df[["AAA"]].to_ca => 2 dimensional CArray with column_names
|
25
|
+
df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
|
26
|
+
|
27
|
+
|
28
|
+
df[dfmask]
|
29
|
+
return new detached CADataFrame masked where dfmask's value equal 1
|
30
|
+
|
31
|
+
|
32
|
+
### Iterators
|
33
|
+
|
34
|
+
CADataFrame#each_column_name { |name| ... }
|
35
|
+
CADataFrame#each_column { |name, column| ... }
|
36
|
+
CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
|
37
|
+
CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
|
38
|
+
|
39
|
+
### Transformation
|
40
|
+
|
41
|
+
CADataFrame#add_suffix(suffix_string) -> CADataFrame
|
42
|
+
Add suffix_string to all column names
|
43
|
+
|
44
|
+
CADataFrame#transpose(column_names: )
|
45
|
+
|
46
|
+
|
47
|
+
### Conversion
|
48
|
+
|
49
|
+
CADataFrame#ca -> CADFArray (Reference Array)
|
50
|
+
CADataFrame#to_ca -> CArray with CA::TableMethods
|
51
|
+
CADataFrame#to_hash -> Hash
|
52
|
+
CADataFrame#to_xlsx(with_row_index: false) -> Hash
|
53
|
+
Masked element converted to "=NA()"
|
54
|
+
|
55
|
+
CADataFrame#columns_to_hash(key_name, *value_names)
|
56
|
+
|
57
|
+
ex) df.columns_to_hash("bbb",["aaa","ccc"])
|
58
|
+
|
59
|
+
---------------
|
60
|
+
aaa bbb ccc
|
61
|
+
---------------
|
62
|
+
4 10 100
|
63
|
+
5 20 50
|
64
|
+
6 30 -30
|
65
|
+
7 40 -50
|
66
|
+
---------------
|
67
|
+
|
68
|
+
{10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
CADataFrame
|
74
|
+
|
75
|
+
#append(name) { INSTANCE_CONTEXT } <- any carray
|
76
|
+
#lead(name) { INSTANCE_CONTEXT } <- any carray
|
77
|
+
#execute { INSTANCE_CONTEXT } => any object
|
78
|
+
#select(name...) { INSTANCE_CONTEXT } <- boolean carray
|
79
|
+
#reorder { INSTANCE_CONTEXT } <- int32 carray (addresses for mapping)
|
80
|
+
#order_by { INSTANCE_CONTEXT } <- Array of int32 carray or carray (addresses for mapping)
|
81
|
+
|
82
|
+
#calculate {|label, column| CALLER_CONTEXT } <- scalar
|
83
|
+
#resample {|label, column| CALLER_CONTEXT } <- any carray
|
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
Gem::Specification::new do |s|
|
3
|
+
version = "1.0.0"
|
4
|
+
|
5
|
+
files = Dir.glob("**/*") - [
|
6
|
+
Dir.glob("carray*.gem"),
|
7
|
+
].flatten
|
8
|
+
|
9
|
+
s.platform = Gem::Platform::RUBY
|
10
|
+
s.name = "carray-dataframe"
|
11
|
+
s.summary = "Extension for realizing DataFrame of R in Ruby"
|
12
|
+
s.description = <<-HERE
|
13
|
+
Extension for realizing DataFrame of R in Ruby
|
14
|
+
HERE
|
15
|
+
s.version = version
|
16
|
+
s.author = "Hiroki Motoyoshi"
|
17
|
+
s.email = ""
|
18
|
+
s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
|
19
|
+
s.files = files
|
20
|
+
s.has_rdoc = false
|
21
|
+
s.required_ruby_version = ">= 1.8.1"
|
22
|
+
s.add_runtime_dependency 'carray', '~> 1.1'
|
23
|
+
s.add_runtime_dependency 'axlsx', '~> 2.0'
|
24
|
+
s.add_runtime_dependency 'spreadsheet', '~> 1.1'
|
25
|
+
end
|
data/examples/R/fit.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
require "carray"
|
2
|
+
require "R"
|
3
|
+
|
4
|
+
R.run
|
5
|
+
|
6
|
+
x = CArray.float(200).span(0..4r)
|
7
|
+
v = x.random(4)-2
|
8
|
+
|
9
|
+
a = 3
|
10
|
+
b = 5
|
11
|
+
c = 7
|
12
|
+
y = a*x**2 + b*x + c + v
|
13
|
+
|
14
|
+
res = R %{
|
15
|
+
nls(y ~ a*x^2 + b*x + c, start=c(a=100,b=1,c=1), trace=TRUE)
|
16
|
+
}, :x=>x, :y=>y
|
17
|
+
|
18
|
+
a1,b1,c1 = R.coef(res).to_ruby.values_at("a","b","c")
|
19
|
+
|
20
|
+
CA.gnuplot {
|
21
|
+
plot [x,y],
|
22
|
+
[x,a1*x**2+b1*x+c, nil, "lines"]
|
23
|
+
}
|
24
|
+
|
data/examples/R/iris.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# From https://oku.edu.mie-u.ac.jp/~okumura/stat/100410a.html
|
3
|
+
#
|
4
|
+
|
5
|
+
require "R"
|
6
|
+
|
7
|
+
R.run
|
8
|
+
|
9
|
+
areaname = ["北海道","本州","四国","九州","沖縄"].to_ca
|
10
|
+
areasize = [83457,231113,18792,42191,2276].to_ca / 10000.0
|
11
|
+
|
12
|
+
R %{
|
13
|
+
par(family="HiraKakuProN-W3")
|
14
|
+
par(las=1)
|
15
|
+
par(mgp=c(2,0.8,0))
|
16
|
+
barplot(areasize, names.arg=areaname)
|
17
|
+
axis(2, labels="面積 (万km^2)", at=20, hadj=0.3, padj=-1, tick=FALSE)
|
18
|
+
}, :areasize=>areasize, :areaname=>areaname
|
19
|
+
|
20
|
+
gets
|
21
|
+
|
22
|
+
R {
|
23
|
+
par :family=>"HiraKakuProN-W3"
|
24
|
+
par :las=>1
|
25
|
+
par :mgp=>[2,0.8,0]
|
26
|
+
barplot areasize, "names.arg"=>areaname
|
27
|
+
axis 2, :labels=>"面積 (万km^2)", :at=>20, :hadj=>0.3, :padj=>-1, :tick=>false
|
28
|
+
}
|
29
|
+
|
30
|
+
gets
|
data/examples/R/kyaku.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require "carray"
|
2
|
+
|
3
|
+
CA.gnuplot {
|
4
|
+
terminal %{ wxt }
|
5
|
+
(1..10).each do |n|
|
6
|
+
x = CArray.double(1000000) {0}
|
7
|
+
n.times do
|
8
|
+
x += CArray.double(1000000).random
|
9
|
+
end
|
10
|
+
x = x/n
|
11
|
+
df = CADataFrame.new(:x=>x)
|
12
|
+
h = df.histogram(:x, CA_DOUBLE(0..1,0.01))
|
13
|
+
|
14
|
+
plot [h.x, h.count, nil, "boxes fill solid 0.5 noborder"],
|
15
|
+
:x=>[nil, 0..1],
|
16
|
+
:title=>n.to_s,
|
17
|
+
:nopause=>true
|
18
|
+
|
19
|
+
sleep 0.5
|
20
|
+
end
|
21
|
+
gets
|
22
|
+
}
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require "carray"
|
2
|
+
|
3
|
+
csv =<<HERE
|
4
|
+
name,v1,v2
|
5
|
+
A,1,3
|
6
|
+
B,3,2
|
7
|
+
C,2,1
|
8
|
+
B,1,3
|
9
|
+
C,1,4
|
10
|
+
A,4,2
|
11
|
+
B,5,3
|
12
|
+
C,3,3
|
13
|
+
C,1,1
|
14
|
+
C,6,3
|
15
|
+
C,8,1
|
16
|
+
A,1,2
|
17
|
+
HERE
|
18
|
+
|
19
|
+
f = CADataFrame.from_csv(csv) {
|
20
|
+
header
|
21
|
+
body
|
22
|
+
}.arrange {
|
23
|
+
int :v1, :v2
|
24
|
+
}
|
25
|
+
|
26
|
+
p f.resample { |l, c|
|
27
|
+
c = c.reshape(false,2)
|
28
|
+
case l
|
29
|
+
when "name"
|
30
|
+
c[nil,-1]
|
31
|
+
else
|
32
|
+
c.max(1)
|
33
|
+
end
|
34
|
+
}
|
35
|
+
|
36
|
+
p df = CADataFrame.concat(f.calculate(:sum),
|
37
|
+
f.calculate(:mean)).arrange {
|
38
|
+
eliminate :name
|
39
|
+
append :sum, v1 + v2
|
40
|
+
}
|
41
|
+
|
42
|
+
p f.group_by(:name).table {
|
43
|
+
{
|
44
|
+
:count => row_number,
|
45
|
+
:v1_sum => v1.sum,
|
46
|
+
:v1_mean => v1.mean,
|
47
|
+
:v2_sum => v2.sum,
|
48
|
+
:v2_mean => v2.mean,
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
p f.group_by(:v2).table {
|
53
|
+
{
|
54
|
+
:count => row_number,
|
55
|
+
:namelist => name.sort.join(""),
|
56
|
+
}
|
57
|
+
}
|
58
|
+
|
59
|
+
p "--- Pivot"
|
60
|
+
p f.pivot({:v1=>CA_INT(1..8)},{:v2=>CA_INT(1..5)}).table {
|
61
|
+
name.size > 0 ? name.join("") : "-"
|
62
|
+
}
|
63
|
+
|
64
|
+
g = f.group_by(:v1,:v2)
|
65
|
+
|
66
|
+
p t = g.table {
|
67
|
+
{
|
68
|
+
:count => row_number,
|
69
|
+
:namelist => name.sort.join(""),
|
70
|
+
}
|
71
|
+
}
|
72
|
+
|
73
|
+
p g[[1,3]]
|
74
|
+
|
75
|
+
p t.select { count >= 2 }
|
76
|
+
|
77
|
+
|
78
|
+
|
data/examples/hist.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require "carray"
|
2
|
+
text = <<EOS
|
3
|
+
name,NAME,a,b,c
|
4
|
+
u,U,1,2,3
|
5
|
+
v,V,2,3,4
|
6
|
+
w,W,5,1,3
|
7
|
+
x,X,4,3,1
|
8
|
+
y,Y,1,1,2
|
9
|
+
z,Z,2,3,1
|
10
|
+
EOS
|
11
|
+
|
12
|
+
df = CADataFrame.from_csv(text) {
|
13
|
+
header
|
14
|
+
body
|
15
|
+
}.arrange {
|
16
|
+
int :a,:b,:c
|
17
|
+
}
|
18
|
+
|
19
|
+
CA.gnuplot {
|
20
|
+
set %{ style histogram rowstacked }
|
21
|
+
set %{ style fill solid border -1 }
|
22
|
+
plot [df.a, "a", "histogram"],
|
23
|
+
[df.b, "b", "histogram"],
|
24
|
+
[df.c, "c", "histogram"],
|
25
|
+
:x=>["NAME",nil,nil, histogram_tics(df.NAME)],
|
26
|
+
:y=>["VALUE",0..20]
|
27
|
+
}
|
data/examples/iris.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require "carray"
|
2
|
+
require "R"
|
3
|
+
require_relative "../lib/carray/dataframe/dataframe"
|
4
|
+
|
5
|
+
R.run
|
6
|
+
|
7
|
+
df = R.iris
|
8
|
+
df.lead "id", df.row_index
|
9
|
+
|
10
|
+
p df
|
11
|
+
|
12
|
+
df.to_xlsx('out.xlsx')
|
13
|
+
|
14
|
+
petal = df[["id", "Petal.Length","Petal.Width"]]
|
15
|
+
sepal = df[["id", "Sepal.Length","Sepal.Width"]]
|
16
|
+
species = df[["id", "Species"]]
|
17
|
+
|
18
|
+
|
19
|
+
p df["Species"].value_counts
|
20
|
+
|
21
|
+
#p d2 = df.to_daru
|
22
|
+
|
23
|
+
tbl = df.to_sql("iris").to_df %{
|
24
|
+
select * from iris order by Sepal_Width desc;
|
25
|
+
}
|
26
|
+
|
27
|
+
p tbl
|
28
|
+
|
29
|
+
#tbl.to_xlsx("out.xlsx")
|
data/examples/map.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require "carray"
|
2
|
+
|
3
|
+
csv =<<CSV
|
4
|
+
id,gender,ans1,ans2
|
5
|
+
1,F,1,0
|
6
|
+
2,F,0,0
|
7
|
+
3,M,1,0
|
8
|
+
4,M,0,1
|
9
|
+
5,F,1,1
|
10
|
+
CSV
|
11
|
+
|
12
|
+
df = CADataFrame.from_csv(csv) {
|
13
|
+
header
|
14
|
+
body
|
15
|
+
}.arrange {
|
16
|
+
int :id,:ans1,:ans2
|
17
|
+
append :ans1s, ["NG","OK"].to_ca[ans1]
|
18
|
+
append :ans2s, ["NG","OK"].values_at(*ans2.to_a)
|
19
|
+
}
|
20
|
+
|
21
|
+
p df
|
22
|
+
|
23
|
+
#p df.group_by(:gender).calculate(:sum)
|
data/examples/match.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "carray"
|
2
|
+
|
3
|
+
df = CADataFrame.new a: [1,2,3,5,6,7,9,10],
|
4
|
+
b: [30,20,20,30,20,10,20,30],
|
5
|
+
c: [2,1,1,1,2,1,2,2]
|
6
|
+
|
7
|
+
a1 = CA_INT([1,2,5,6,7,9])
|
8
|
+
a2 = CA_INT([1,2,3,4,5,6,7,8,10,11])
|
9
|
+
|
10
|
+
p df
|
11
|
+
p df.matchup(:a, a1)
|
12
|
+
df2 = df.matchup(:a, a2)
|
13
|
+
|
14
|
+
df2.arrange {
|
15
|
+
unmask -9999, :b, :c
|
16
|
+
}
|
17
|
+
|
18
|
+
p df2
|
19
|
+
|
20
|
+
b1 = CA_INT([10,20,30])
|
21
|
+
p df.matchup(:b, b1)
|
data/examples/test.xlsx
ADDED
Binary file
|
data/examples/test1.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
require "carray"
|
2
|
+
require_relative "../lib/carray/dataframe/dataframe"
|
3
|
+
|
4
|
+
hash = { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
|
5
|
+
p df = CADataFrame.new(hash)
|
6
|
+
|
7
|
+
table = CA_OBJECT([[4,5,6,7], [10,20,30,40], [100,50,-30,-50]]).t
|
8
|
+
p df = CADataFrame.new(table, column_names: ['aaa','bbb','ccc'])
|
9
|
+
|
10
|
+
table.extend(CA::TableMethods)
|
11
|
+
table.column_names = ['aaa','bbb','ccc']
|
12
|
+
p df = CADataFrame.new(table)
|
13
|
+
|
14
|
+
hash = { 'aaa' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
|
15
|
+
p df = CADataFrame.new(hash, row_index: ["a","b","c","d"])
|
16
|
+
|
17
|
+
df.each_row(with: Array) {|row|
|
18
|
+
p row
|
19
|
+
}
|
20
|
+
|
21
|
+
df.each_row_with_row_index(with: Array) {|row,i|
|
22
|
+
p [row,i]
|
23
|
+
}
|
24
|
+
|
25
|
+
p df.to_ca.column_names
|
26
|
+
p df.to_hash
|
27
|
+
p df.columns_to_hash("bbb","aaa")
|
28
|
+
p df.columns_to_hash("bbb",["aaa"])
|
29
|
+
p df.columns_to_hash("bbb",["aaa","ccc"])
|
30
|
+
|
31
|
+
p df.add_suffix("_no")
|
32
|
+
p df.transpose
|
33
|
+
|
34
|
+
p df2 = df.to_df
|
35
|
+
|
36
|
+
p df["aaa"]
|
37
|
+
p b = df[["aaa"]]
|
38
|
+
#b.detouch!
|
39
|
+
|
40
|
+
b[0,0] = -1111
|
41
|
+
|
42
|
+
p df
|
43
|
+
p df2
|
44
|
+
|