carray-dataframe 1.0.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/API.txt +1 -7
- data/README.md +3 -1
- data/Rakefile +11 -0
- data/carray-dataframe.gemspec +8 -6
- data/lib/carray-dataframe.rb +13 -0
- data/lib/carray-dataframe/arranger.rb +209 -0
- data/lib/carray-dataframe/cadf_array.rb +106 -0
- data/lib/carray-dataframe/converter.rb +97 -0
- data/lib/carray-dataframe/dataframe.rb +1279 -0
- data/lib/carray-dataframe/group.rb +199 -0
- data/lib/carray-dataframe/iloc_accessor.rb +62 -0
- data/lib/carray-dataframe/io.rb +96 -0
- data/lib/carray-dataframe/join.rb +283 -0
- data/lib/carray-dataframe/loc_accessor.rb +145 -0
- data/lib/carray-dataframe/pivot.rb +54 -0
- data/lib/carray-dataframe/reference.rb +142 -0
- data/lib/carray-dataframe/to_html.rb +102 -0
- metadata +23 -28
- data/examples/R/fit.rb +0 -24
- data/examples/R/iris.rb +0 -9
- data/examples/R/japan_area.rb +0 -30
- data/examples/R/kyaku.rb +0 -22
- data/examples/group_by.rb +0 -78
- data/examples/hist.rb +0 -27
- data/examples/iris.rb +0 -29
- data/examples/map.rb +0 -23
- data/examples/match.rb +0 -21
- data/examples/test.xlsx +0 -0
- data/examples/test1.rb +0 -44
- data/examples/test2.rb +0 -14
- data/examples/test3.db +0 -0
- data/examples/test3.rb +0 -11
- data/examples/test3.xlsx +0 -0
- data/examples/to_excel.rb +0 -27
- data/lib/R.rb +0 -365
- data/lib/carray/autoload/autoload_dataframe_dataframe.rb +0 -26
- data/lib/carray/dataframe/dataframe.rb +0 -1640
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c0561562238eb969944e6e7977f3cdc4edbe3370187ea1f7ba01a2f6af60bf40
|
4
|
+
data.tar.gz: 738a716a1b5055fefb8b182dd26a3a9ac559563072b02df151bf16eab652f245
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 320b6326fb40b111bc601c38d0e2a5a917434dedfd96f9830c4d24dd66df507d7450d30027829f2a832c0673c69bbe20cb7bb5201adad4cc41728505ecbea9d3
|
7
|
+
data.tar.gz: 4bd51f6c826a561dbb108aa57988520ca870fc02d8615babaa65644ca4869dd781b48c6015b19ba5235943fa7d793d0789ced918ae3675b9f4ae3aef0eb0aaf7
|
data/API.txt
CHANGED
@@ -24,16 +24,14 @@ df["AAA"] => 1 dimensional CArray
|
|
24
24
|
df[["AAA"]].to_ca => 2 dimensional CArray with column_names
|
25
25
|
df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
|
26
26
|
|
27
|
-
|
28
27
|
df[dfmask]
|
29
28
|
return new detached CADataFrame masked where dfmask's value equal 1
|
30
29
|
|
31
|
-
|
32
30
|
### Iterators
|
33
31
|
|
34
32
|
CADataFrame#each_column_name { |name| ... }
|
35
33
|
CADataFrame#each_column { |name, column| ... }
|
36
|
-
CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
|
34
|
+
CADataFrame#each_row(with: [Array|Hash|CArray]) { |row| ... }
|
37
35
|
CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
|
38
36
|
|
39
37
|
### Transformation
|
@@ -43,7 +41,6 @@ CADataFrame#add_suffix(suffix_string) -> CADataFrame
|
|
43
41
|
|
44
42
|
CADataFrame#transpose(column_names: )
|
45
43
|
|
46
|
-
|
47
44
|
### Conversion
|
48
45
|
|
49
46
|
CADataFrame#ca -> CADFArray (Reference Array)
|
@@ -67,9 +64,6 @@ ex) df.columns_to_hash("bbb",["aaa","ccc"])
|
|
67
64
|
|
68
65
|
{10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
|
69
66
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
67
|
CADataFrame
|
74
68
|
|
75
69
|
#append(name) { INSTANCE_CONTEXT } <- any carray
|
data/README.md
CHANGED
data/Rakefile
ADDED
data/carray-dataframe.gemspec
CHANGED
@@ -1,25 +1,27 @@
|
|
1
1
|
|
2
2
|
Gem::Specification::new do |s|
|
3
|
-
version = "1.
|
3
|
+
version = "1.1.1"
|
4
4
|
|
5
5
|
files = Dir.glob("**/*") - [
|
6
|
-
Dir.glob("carray*.gem"),
|
6
|
+
Dir.glob("carray-dataframe*.gem"),
|
7
|
+
Dir.glob("examples/**/*"),
|
8
|
+
Dir.glob("test/**/*"),
|
7
9
|
].flatten
|
8
10
|
|
9
11
|
s.platform = Gem::Platform::RUBY
|
10
12
|
s.name = "carray-dataframe"
|
11
|
-
s.summary = "
|
13
|
+
s.summary = "DataFrame in Ruby"
|
12
14
|
s.description = <<-HERE
|
13
|
-
|
15
|
+
DataFrame in Ruby
|
14
16
|
HERE
|
15
17
|
s.version = version
|
16
18
|
s.author = "Hiroki Motoyoshi"
|
17
19
|
s.email = ""
|
18
20
|
s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
|
21
|
+
s.license = 'MIT'
|
19
22
|
s.files = files
|
20
|
-
s.has_rdoc = false
|
21
23
|
s.required_ruby_version = ">= 1.8.1"
|
22
|
-
s.add_runtime_dependency 'carray', '~> 1.
|
24
|
+
s.add_runtime_dependency 'carray', '~> 1.5'
|
23
25
|
s.add_runtime_dependency 'axlsx', '~> 2.0'
|
24
26
|
s.add_runtime_dependency 'spreadsheet', '~> 1.1'
|
25
27
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require "carray-timeindex"
|
2
|
+
require "carray-dataframe/dataframe"
|
3
|
+
require "carray-dataframe/reference"
|
4
|
+
require "carray-dataframe/loc_accessor"
|
5
|
+
require "carray-dataframe/iloc_accessor"
|
6
|
+
require "carray-dataframe/arranger"
|
7
|
+
require "carray-dataframe/cadf_array"
|
8
|
+
require "carray-dataframe/group"
|
9
|
+
require "carray-dataframe/pivot"
|
10
|
+
require "carray-dataframe/join"
|
11
|
+
require "carray-dataframe/io"
|
12
|
+
require "carray-dataframe/converter"
|
13
|
+
require "carray-dataframe/to_html"
|
@@ -0,0 +1,209 @@
|
|
1
|
+
#############################################################
|
2
|
+
#
|
3
|
+
# ARRANGER
|
4
|
+
#
|
5
|
+
#############################################################
|
6
|
+
class CADataFrame
|
7
|
+
class Arranger
|
8
|
+
|
9
|
+
def initialize (dataframe)
|
10
|
+
@dataframe = dataframe
|
11
|
+
end
|
12
|
+
|
13
|
+
def arrange (&block)
|
14
|
+
case block.arity
|
15
|
+
when 1
|
16
|
+
instance_exec(@dataframe, &block)
|
17
|
+
else
|
18
|
+
instance_exec(&block)
|
19
|
+
end
|
20
|
+
return @dataframe
|
21
|
+
end
|
22
|
+
private
|
23
|
+
|
24
|
+
def index
|
25
|
+
return @dataframe.index
|
26
|
+
end
|
27
|
+
|
28
|
+
def column_names
|
29
|
+
return @dataframe.column_names
|
30
|
+
end
|
31
|
+
|
32
|
+
def row_number
|
33
|
+
return @dataframe.row_number
|
34
|
+
end
|
35
|
+
|
36
|
+
def row_index
|
37
|
+
return @dataframe.row_index
|
38
|
+
end
|
39
|
+
|
40
|
+
def method (hash)
|
41
|
+
@dataframe.method(hash)
|
42
|
+
end
|
43
|
+
|
44
|
+
def timeseries (name, format)
|
45
|
+
time_column = @dataframe.columns[name.to_s]
|
46
|
+
column = CATimeIndex.from_time_array(time_column, "sec", format: format)
|
47
|
+
@dataframe.columns[name.to_s] = column
|
48
|
+
end
|
49
|
+
|
50
|
+
def timeindex (name, unit: "second", since: nil, offset: nil, format: nil)
|
51
|
+
time_column = @dataframe.columns[name.to_s]
|
52
|
+
column = CATimeIndex.from_time_array(time_column, unit, since: since, format: format, offset: offset)
|
53
|
+
@dataframe.columns[name.to_s] = column
|
54
|
+
end
|
55
|
+
|
56
|
+
def type (type, name, mask = :novalue)
|
57
|
+
@dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
|
58
|
+
if mask != :novalue
|
59
|
+
@dataframe.columns[name.to_s].maskout!(options[:maskout])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def eliminate (*names)
|
64
|
+
if names.empty?
|
65
|
+
return self
|
66
|
+
end
|
67
|
+
names = names.map(&:to_s)
|
68
|
+
@dataframe.column_names.clone.each do |name|
|
69
|
+
if names.include?(name)
|
70
|
+
@dataframe.columns.delete(name)
|
71
|
+
@dataframe.column_names.delete(name)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
alias drop eliminate
|
77
|
+
|
78
|
+
def template (*args, &block)
|
79
|
+
return @dataframe.columns.first[1].template(*args, &block)
|
80
|
+
end
|
81
|
+
|
82
|
+
def double (*names)
|
83
|
+
names.flatten.map(&:to_s).each do |name|
|
84
|
+
if @dataframe.column_names.include?(name)
|
85
|
+
type(:double, name)
|
86
|
+
else
|
87
|
+
raise "Unknown column name '#{name}'"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def int (*names)
|
93
|
+
names.flatten.map(&:to_s).each do |name|
|
94
|
+
if @dataframe.column_names.include?(name)
|
95
|
+
type(:int, name)
|
96
|
+
else
|
97
|
+
raise "Unknown column name '#{name}'"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def maskout (value, *names)
|
103
|
+
warn "maskout is obsolete. use mask(name, *values)"
|
104
|
+
names.flatten.map(&:to_s).each do |name|
|
105
|
+
@dataframe.columns[name].maskout!(value)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def mask (name, *values)
|
110
|
+
column = @dataframe.columns[name.to_s]
|
111
|
+
values.each do |args|
|
112
|
+
if args.nil?
|
113
|
+
column.maskout!(nil)
|
114
|
+
else
|
115
|
+
column.maskout!(*args)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def unmask (value, *names)
|
121
|
+
names.flatten.map(&:to_s).each do |name|
|
122
|
+
@dataframe.columns[name].unmask(value)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def col (name)
|
127
|
+
return @dataframe.col(name)
|
128
|
+
end
|
129
|
+
|
130
|
+
def append (name, new_column)
|
131
|
+
if new_column
|
132
|
+
# do nothing
|
133
|
+
else
|
134
|
+
new_column = @dataframe.columns.first[1].template(:object)
|
135
|
+
end
|
136
|
+
unless new_column.is_a?(CArray)
|
137
|
+
new_column = new_column.to_ca
|
138
|
+
end
|
139
|
+
@dataframe.columns[name.to_s] = new_column
|
140
|
+
@dataframe.column_names.push(name.to_s)
|
141
|
+
end
|
142
|
+
|
143
|
+
def prepend (name, new_column)
|
144
|
+
if new_column
|
145
|
+
# do nothing
|
146
|
+
else
|
147
|
+
new_column = @dataframe.columns.first[1].template(:object)
|
148
|
+
end
|
149
|
+
unless new_column.is_a?(CArray)
|
150
|
+
new_column = new_column.to_ca
|
151
|
+
end
|
152
|
+
@dataframe.columns[name.to_s] = new_column
|
153
|
+
@dataframe.column_names.unshift(name.to_s)
|
154
|
+
end
|
155
|
+
alias lead prepend
|
156
|
+
|
157
|
+
def rename (name1, name2)
|
158
|
+
if idx = @dataframe.column_names.index(name1.to_s)
|
159
|
+
@dataframe.column_names[idx] = name2.to_s
|
160
|
+
column = @dataframe.columns[name1.to_s]
|
161
|
+
@dataframe.columns.delete(name1.to_s)
|
162
|
+
@dataframe.columns[name2.to_s] = column
|
163
|
+
else
|
164
|
+
raise "unknown column name #{name1}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def downcase
|
169
|
+
@dataframe.downcase
|
170
|
+
end
|
171
|
+
|
172
|
+
def classify (name, scale, opt = {})
|
173
|
+
return @dataframe.classify(name, scale, opt)
|
174
|
+
end
|
175
|
+
|
176
|
+
def map (mapper, name_or_column)
|
177
|
+
case name_or_column
|
178
|
+
when String, Symbol
|
179
|
+
name = name_or_column
|
180
|
+
column = @dataframe.columns[name.to_s]
|
181
|
+
when CArray
|
182
|
+
column = name_or_column
|
183
|
+
when Array
|
184
|
+
column = name_or_column.to_ca
|
185
|
+
else
|
186
|
+
raise "invalid argument"
|
187
|
+
end
|
188
|
+
case mapper
|
189
|
+
when Hash
|
190
|
+
return column.convert(:object) {|v| hash[v] }
|
191
|
+
when CArray
|
192
|
+
return mapper.project(column)
|
193
|
+
when Array
|
194
|
+
return mapper.to_ca.project(column)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def method_missing (name, *args)
|
199
|
+
if args.size == 0
|
200
|
+
if @dataframe.column_names.include?(name.to_s)
|
201
|
+
return @dataframe.columns[name.to_s]
|
202
|
+
elsif @dataframe.__methods__.include?(name.to_s)
|
203
|
+
return @dataframe.columns[@dataframe.__methods__[name.to_s]]
|
204
|
+
end
|
205
|
+
end
|
206
|
+
super
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#############################################################
|
2
|
+
#
|
3
|
+
# CADFArray
|
4
|
+
#
|
5
|
+
#############################################################
|
6
|
+
class CADFArray < CAObject # :nodoc:
|
7
|
+
|
8
|
+
def initialize (column_names, column_data, index: nil)
|
9
|
+
@column_names = column_names
|
10
|
+
@column_data = column_data
|
11
|
+
if index
|
12
|
+
@index = index
|
13
|
+
else
|
14
|
+
@index = CArray.int(column_data.first[1].size).seq
|
15
|
+
end
|
16
|
+
dim = [@column_data[@column_names.first].size, @column_names.size]
|
17
|
+
extend CArray::TableMethods
|
18
|
+
super(:object, dim, :read_only=>true)
|
19
|
+
__create_mask__
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :column_names, :index
|
23
|
+
|
24
|
+
def fetch_index (idx)
|
25
|
+
r, c = *idx
|
26
|
+
name = @column_names[c]
|
27
|
+
return @column_data[name].value[r]
|
28
|
+
end
|
29
|
+
|
30
|
+
def store_index (idx, value)
|
31
|
+
r, c = *idx
|
32
|
+
name = @column_names[c]
|
33
|
+
return @column_data[name][r] = value
|
34
|
+
end
|
35
|
+
|
36
|
+
def copy_data (data)
|
37
|
+
@column_names.each_with_index do |name, i|
|
38
|
+
data[nil,i] = @column_data[name].value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sync_data (data)
|
43
|
+
@column_names.each_with_index do |name, i|
|
44
|
+
@column_data[name].value[] = data[nil,i]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def fill_data (value)
|
49
|
+
@column_names.each do |name|
|
50
|
+
@column_data[name] = value
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def create_mask
|
55
|
+
@column_names.each do |name|
|
56
|
+
@column_data[name].instance_eval{ __create_mask__ }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def mask_fetch_index (idx)
|
61
|
+
r, c = *idx
|
62
|
+
name = @column_names[c]
|
63
|
+
if @column_data[name].has_mask?
|
64
|
+
return @column_data[name].mask[r]
|
65
|
+
else
|
66
|
+
return 0
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def mask_store_index (idx, value)
|
71
|
+
r, c = *idx
|
72
|
+
name = @column_names[c]
|
73
|
+
if @column_data[name].has_mask?
|
74
|
+
return @column_data[name].mask[r] = value
|
75
|
+
else
|
76
|
+
@column_data[name].mask[r] = value
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def mask_copy_data (data)
|
81
|
+
@column_names.each_with_index do |name, i|
|
82
|
+
if @column_data[name].has_mask?
|
83
|
+
data[nil,i] = @column_data[name].mask
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def mask_sync_data (data)
|
89
|
+
@column_names.each_with_index do |name, i|
|
90
|
+
@column_data[name].mask[] = data[nil,i]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def mask_fill_data (value)
|
95
|
+
@column_names.each do |name|
|
96
|
+
@column_data[name].mask[] = value
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_ca
|
101
|
+
obj = super
|
102
|
+
obj.extend CArray::TableMethods
|
103
|
+
obj.column_names = @column_names
|
104
|
+
return obj
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
|
2
|
+
module CArray::TableMethods
|
3
|
+
|
4
|
+
def to_dataframe (index: nil, &block)
|
5
|
+
if self.size == 0
|
6
|
+
return nil
|
7
|
+
end
|
8
|
+
df = CADataFrame.new(self, index: index, &block)
|
9
|
+
if @header or @note
|
10
|
+
df.instance_variable_set(:@header, @header)
|
11
|
+
df.instance_variable_set(:@note, @note)
|
12
|
+
class << df
|
13
|
+
attr_reader :note
|
14
|
+
|
15
|
+
def header (name=nil)
|
16
|
+
if name
|
17
|
+
return @header[name.to_s]
|
18
|
+
else
|
19
|
+
return @column_names
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return df
|
25
|
+
end
|
26
|
+
alias to_df to_dataframe
|
27
|
+
end
|
28
|
+
|
29
|
+
class CADataFrame
|
30
|
+
|
31
|
+
def to_a (with_index: true)
|
32
|
+
if @row_index and with_index
|
33
|
+
namelist = [""] + @column_names
|
34
|
+
tbl = CADFArray.new(namelist, @column_data.clone.update("" => index))
|
35
|
+
else
|
36
|
+
tbl = ca.to_ca
|
37
|
+
end
|
38
|
+
return tbl.to_a
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_csv (io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block)
|
42
|
+
if @row_index and with_index
|
43
|
+
namelist = ["index"] + @column_names
|
44
|
+
columns = @column_data.clone.update("index" => index)
|
45
|
+
else
|
46
|
+
namelist = @column_names
|
47
|
+
columns = @column_data.clone
|
48
|
+
end
|
49
|
+
columns.each do |k, v|
|
50
|
+
if v.is_a?(CATimeIndex)
|
51
|
+
if time_format
|
52
|
+
columns[k] = v.time.time_format(time_format)
|
53
|
+
else
|
54
|
+
columns[k] = v.time.convert(:object){|t| t.to_s}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
tbl = CADFArray.new(namelist, columns)
|
59
|
+
return tbl.to_csv(io, **{rs: rs, sep: sep, fill: fill}, &block)
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_daru
|
63
|
+
require "daru"
|
64
|
+
columns = {}
|
65
|
+
each_column_name do |name|
|
66
|
+
columns[name] = column(name).object.unmask(nil).to_a
|
67
|
+
end
|
68
|
+
if @row_index
|
69
|
+
return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names)
|
70
|
+
else
|
71
|
+
return Daru::DataFrame.new(columns, order: @column_names)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
|
76
|
+
require "axlsx"
|
77
|
+
xl = Axlsx::Package.new
|
78
|
+
xl.use_shared_strings = true
|
79
|
+
sheet = xl.workbook.add_worksheet(name: sheet_name)
|
80
|
+
df = self.to_df.objectify.unmask("=NA()")
|
81
|
+
if with_row_index
|
82
|
+
sheet.add_row([""] + column_names)
|
83
|
+
df.each_row_with_row_index(with: Array) do |list, i|
|
84
|
+
sheet.add_row([i] + list)
|
85
|
+
end
|
86
|
+
else
|
87
|
+
sheet.add_row(column_names)
|
88
|
+
df.each_row(with: Array) do |list|
|
89
|
+
sheet.add_row(list)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
if block_given?
|
93
|
+
yield sheet
|
94
|
+
end
|
95
|
+
xl.serialize(filename)
|
96
|
+
end
|
97
|
+
end
|