carray-dataframe 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/API.txt +1 -7
- data/README.md +3 -1
- data/Rakefile +11 -0
- data/carray-dataframe.gemspec +8 -6
- data/lib/carray-dataframe.rb +13 -0
- data/lib/carray-dataframe/arranger.rb +209 -0
- data/lib/carray-dataframe/cadf_array.rb +106 -0
- data/lib/carray-dataframe/converter.rb +97 -0
- data/lib/carray-dataframe/dataframe.rb +1279 -0
- data/lib/carray-dataframe/group.rb +199 -0
- data/lib/carray-dataframe/iloc_accessor.rb +62 -0
- data/lib/carray-dataframe/io.rb +96 -0
- data/lib/carray-dataframe/join.rb +283 -0
- data/lib/carray-dataframe/loc_accessor.rb +145 -0
- data/lib/carray-dataframe/pivot.rb +54 -0
- data/lib/carray-dataframe/reference.rb +142 -0
- data/lib/carray-dataframe/to_html.rb +102 -0
- metadata +23 -28
- data/examples/R/fit.rb +0 -24
- data/examples/R/iris.rb +0 -9
- data/examples/R/japan_area.rb +0 -30
- data/examples/R/kyaku.rb +0 -22
- data/examples/group_by.rb +0 -78
- data/examples/hist.rb +0 -27
- data/examples/iris.rb +0 -29
- data/examples/map.rb +0 -23
- data/examples/match.rb +0 -21
- data/examples/test.xlsx +0 -0
- data/examples/test1.rb +0 -44
- data/examples/test2.rb +0 -14
- data/examples/test3.db +0 -0
- data/examples/test3.rb +0 -11
- data/examples/test3.xlsx +0 -0
- data/examples/to_excel.rb +0 -27
- data/lib/R.rb +0 -365
- data/lib/carray/autoload/autoload_dataframe_dataframe.rb +0 -26
- data/lib/carray/dataframe/dataframe.rb +0 -1640
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c0561562238eb969944e6e7977f3cdc4edbe3370187ea1f7ba01a2f6af60bf40
|
4
|
+
data.tar.gz: 738a716a1b5055fefb8b182dd26a3a9ac559563072b02df151bf16eab652f245
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 320b6326fb40b111bc601c38d0e2a5a917434dedfd96f9830c4d24dd66df507d7450d30027829f2a832c0673c69bbe20cb7bb5201adad4cc41728505ecbea9d3
|
7
|
+
data.tar.gz: 4bd51f6c826a561dbb108aa57988520ca870fc02d8615babaa65644ca4869dd781b48c6015b19ba5235943fa7d793d0789ced918ae3675b9f4ae3aef0eb0aaf7
|
data/API.txt
CHANGED
@@ -24,16 +24,14 @@ df["AAA"] => 1 dimensional CArray
|
|
24
24
|
df[["AAA"]].to_ca => 2 dimensional CArray with column_names
|
25
25
|
df[["AAA","BBB"]].to_ca => 2 dimensional CArray with column_names
|
26
26
|
|
27
|
-
|
28
27
|
df[dfmask]
|
29
28
|
return new detached CADataFrame masked where dfmask's value equal 1
|
30
29
|
|
31
|
-
|
32
30
|
### Iterators
|
33
31
|
|
34
32
|
CADataFrame#each_column_name { |name| ... }
|
35
33
|
CADataFrame#each_column { |name, column| ... }
|
36
|
-
CADataFrame#each_row(with: [Array|Hash]) { |row| ... }
|
34
|
+
CADataFrame#each_row(with: [Array|Hash|CArray]) { |row| ... }
|
37
35
|
CADataFrame#each_row_with_row_index(with: [Array|Hash]) { |row, idx| ... }
|
38
36
|
|
39
37
|
### Transformation
|
@@ -43,7 +41,6 @@ CADataFrame#add_suffix(suffix_string) -> CADataFrame
|
|
43
41
|
|
44
42
|
CADataFrame#transpose(column_names: )
|
45
43
|
|
46
|
-
|
47
44
|
### Conversion
|
48
45
|
|
49
46
|
CADataFrame#ca -> CADFArray (Reference Array)
|
@@ -67,9 +64,6 @@ ex) df.columns_to_hash("bbb",["aaa","ccc"])
|
|
67
64
|
|
68
65
|
{10=>[4, 100], 20=>[5, 50], 30=>[6, -30], 40=>[7, -50]}
|
69
66
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
67
|
CADataFrame
|
74
68
|
|
75
69
|
#append(name) { INSTANCE_CONTEXT } <- any carray
|
data/README.md
CHANGED
data/Rakefile
ADDED
data/carray-dataframe.gemspec
CHANGED
@@ -1,25 +1,27 @@
|
|
1
1
|
|
2
2
|
Gem::Specification::new do |s|
|
3
|
-
version = "1.
|
3
|
+
version = "1.1.1"
|
4
4
|
|
5
5
|
files = Dir.glob("**/*") - [
|
6
|
-
Dir.glob("carray*.gem"),
|
6
|
+
Dir.glob("carray-dataframe*.gem"),
|
7
|
+
Dir.glob("examples/**/*"),
|
8
|
+
Dir.glob("test/**/*"),
|
7
9
|
].flatten
|
8
10
|
|
9
11
|
s.platform = Gem::Platform::RUBY
|
10
12
|
s.name = "carray-dataframe"
|
11
|
-
s.summary = "
|
13
|
+
s.summary = "DataFrame in Ruby"
|
12
14
|
s.description = <<-HERE
|
13
|
-
|
15
|
+
DataFrame in Ruby
|
14
16
|
HERE
|
15
17
|
s.version = version
|
16
18
|
s.author = "Hiroki Motoyoshi"
|
17
19
|
s.email = ""
|
18
20
|
s.homepage = 'https://github.com/himotoyoshi/carray-dataframe'
|
21
|
+
s.license = 'MIT'
|
19
22
|
s.files = files
|
20
|
-
s.has_rdoc = false
|
21
23
|
s.required_ruby_version = ">= 1.8.1"
|
22
|
-
s.add_runtime_dependency 'carray', '~> 1.
|
24
|
+
s.add_runtime_dependency 'carray', '~> 1.5'
|
23
25
|
s.add_runtime_dependency 'axlsx', '~> 2.0'
|
24
26
|
s.add_runtime_dependency 'spreadsheet', '~> 1.1'
|
25
27
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require "carray-timeindex"
|
2
|
+
require "carray-dataframe/dataframe"
|
3
|
+
require "carray-dataframe/reference"
|
4
|
+
require "carray-dataframe/loc_accessor"
|
5
|
+
require "carray-dataframe/iloc_accessor"
|
6
|
+
require "carray-dataframe/arranger"
|
7
|
+
require "carray-dataframe/cadf_array"
|
8
|
+
require "carray-dataframe/group"
|
9
|
+
require "carray-dataframe/pivot"
|
10
|
+
require "carray-dataframe/join"
|
11
|
+
require "carray-dataframe/io"
|
12
|
+
require "carray-dataframe/converter"
|
13
|
+
require "carray-dataframe/to_html"
|
@@ -0,0 +1,209 @@
|
|
1
|
+
#############################################################
|
2
|
+
#
|
3
|
+
# ARRANGER
|
4
|
+
#
|
5
|
+
#############################################################
|
6
|
+
class CADataFrame
|
7
|
+
class Arranger
|
8
|
+
|
9
|
+
def initialize (dataframe)
|
10
|
+
@dataframe = dataframe
|
11
|
+
end
|
12
|
+
|
13
|
+
def arrange (&block)
|
14
|
+
case block.arity
|
15
|
+
when 1
|
16
|
+
instance_exec(@dataframe, &block)
|
17
|
+
else
|
18
|
+
instance_exec(&block)
|
19
|
+
end
|
20
|
+
return @dataframe
|
21
|
+
end
|
22
|
+
private
|
23
|
+
|
24
|
+
def index
|
25
|
+
return @dataframe.index
|
26
|
+
end
|
27
|
+
|
28
|
+
def column_names
|
29
|
+
return @dataframe.column_names
|
30
|
+
end
|
31
|
+
|
32
|
+
def row_number
|
33
|
+
return @dataframe.row_number
|
34
|
+
end
|
35
|
+
|
36
|
+
def row_index
|
37
|
+
return @dataframe.row_index
|
38
|
+
end
|
39
|
+
|
40
|
+
def method (hash)
|
41
|
+
@dataframe.method(hash)
|
42
|
+
end
|
43
|
+
|
44
|
+
def timeseries (name, format)
|
45
|
+
time_column = @dataframe.columns[name.to_s]
|
46
|
+
column = CATimeIndex.from_time_array(time_column, "sec", format: format)
|
47
|
+
@dataframe.columns[name.to_s] = column
|
48
|
+
end
|
49
|
+
|
50
|
+
def timeindex (name, unit: "second", since: nil, offset: nil, format: nil)
|
51
|
+
time_column = @dataframe.columns[name.to_s]
|
52
|
+
column = CATimeIndex.from_time_array(time_column, unit, since: since, format: format, offset: offset)
|
53
|
+
@dataframe.columns[name.to_s] = column
|
54
|
+
end
|
55
|
+
|
56
|
+
def type (type, name, mask = :novalue)
|
57
|
+
@dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
|
58
|
+
if mask != :novalue
|
59
|
+
@dataframe.columns[name.to_s].maskout!(options[:maskout])
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def eliminate (*names)
|
64
|
+
if names.empty?
|
65
|
+
return self
|
66
|
+
end
|
67
|
+
names = names.map(&:to_s)
|
68
|
+
@dataframe.column_names.clone.each do |name|
|
69
|
+
if names.include?(name)
|
70
|
+
@dataframe.columns.delete(name)
|
71
|
+
@dataframe.column_names.delete(name)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
alias drop eliminate
|
77
|
+
|
78
|
+
def template (*args, &block)
|
79
|
+
return @dataframe.columns.first[1].template(*args, &block)
|
80
|
+
end
|
81
|
+
|
82
|
+
def double (*names)
|
83
|
+
names.flatten.map(&:to_s).each do |name|
|
84
|
+
if @dataframe.column_names.include?(name)
|
85
|
+
type(:double, name)
|
86
|
+
else
|
87
|
+
raise "Unknown column name '#{name}'"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def int (*names)
|
93
|
+
names.flatten.map(&:to_s).each do |name|
|
94
|
+
if @dataframe.column_names.include?(name)
|
95
|
+
type(:int, name)
|
96
|
+
else
|
97
|
+
raise "Unknown column name '#{name}'"
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def maskout (value, *names)
|
103
|
+
warn "maskout is obsolete. use mask(name, *values)"
|
104
|
+
names.flatten.map(&:to_s).each do |name|
|
105
|
+
@dataframe.columns[name].maskout!(value)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def mask (name, *values)
|
110
|
+
column = @dataframe.columns[name.to_s]
|
111
|
+
values.each do |args|
|
112
|
+
if args.nil?
|
113
|
+
column.maskout!(nil)
|
114
|
+
else
|
115
|
+
column.maskout!(*args)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def unmask (value, *names)
|
121
|
+
names.flatten.map(&:to_s).each do |name|
|
122
|
+
@dataframe.columns[name].unmask(value)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def col (name)
|
127
|
+
return @dataframe.col(name)
|
128
|
+
end
|
129
|
+
|
130
|
+
def append (name, new_column)
|
131
|
+
if new_column
|
132
|
+
# do nothing
|
133
|
+
else
|
134
|
+
new_column = @dataframe.columns.first[1].template(:object)
|
135
|
+
end
|
136
|
+
unless new_column.is_a?(CArray)
|
137
|
+
new_column = new_column.to_ca
|
138
|
+
end
|
139
|
+
@dataframe.columns[name.to_s] = new_column
|
140
|
+
@dataframe.column_names.push(name.to_s)
|
141
|
+
end
|
142
|
+
|
143
|
+
def prepend (name, new_column)
|
144
|
+
if new_column
|
145
|
+
# do nothing
|
146
|
+
else
|
147
|
+
new_column = @dataframe.columns.first[1].template(:object)
|
148
|
+
end
|
149
|
+
unless new_column.is_a?(CArray)
|
150
|
+
new_column = new_column.to_ca
|
151
|
+
end
|
152
|
+
@dataframe.columns[name.to_s] = new_column
|
153
|
+
@dataframe.column_names.unshift(name.to_s)
|
154
|
+
end
|
155
|
+
alias lead prepend
|
156
|
+
|
157
|
+
def rename (name1, name2)
|
158
|
+
if idx = @dataframe.column_names.index(name1.to_s)
|
159
|
+
@dataframe.column_names[idx] = name2.to_s
|
160
|
+
column = @dataframe.columns[name1.to_s]
|
161
|
+
@dataframe.columns.delete(name1.to_s)
|
162
|
+
@dataframe.columns[name2.to_s] = column
|
163
|
+
else
|
164
|
+
raise "unknown column name #{name1}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def downcase
|
169
|
+
@dataframe.downcase
|
170
|
+
end
|
171
|
+
|
172
|
+
def classify (name, scale, opt = {})
|
173
|
+
return @dataframe.classify(name, scale, opt)
|
174
|
+
end
|
175
|
+
|
176
|
+
def map (mapper, name_or_column)
|
177
|
+
case name_or_column
|
178
|
+
when String, Symbol
|
179
|
+
name = name_or_column
|
180
|
+
column = @dataframe.columns[name.to_s]
|
181
|
+
when CArray
|
182
|
+
column = name_or_column
|
183
|
+
when Array
|
184
|
+
column = name_or_column.to_ca
|
185
|
+
else
|
186
|
+
raise "invalid argument"
|
187
|
+
end
|
188
|
+
case mapper
|
189
|
+
when Hash
|
190
|
+
return column.convert(:object) {|v| hash[v] }
|
191
|
+
when CArray
|
192
|
+
return mapper.project(column)
|
193
|
+
when Array
|
194
|
+
return mapper.to_ca.project(column)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def method_missing (name, *args)
|
199
|
+
if args.size == 0
|
200
|
+
if @dataframe.column_names.include?(name.to_s)
|
201
|
+
return @dataframe.columns[name.to_s]
|
202
|
+
elsif @dataframe.__methods__.include?(name.to_s)
|
203
|
+
return @dataframe.columns[@dataframe.__methods__[name.to_s]]
|
204
|
+
end
|
205
|
+
end
|
206
|
+
super
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
#############################################################
|
2
|
+
#
|
3
|
+
# CADFArray
|
4
|
+
#
|
5
|
+
#############################################################
|
6
|
+
class CADFArray < CAObject # :nodoc:
|
7
|
+
|
8
|
+
def initialize (column_names, column_data, index: nil)
|
9
|
+
@column_names = column_names
|
10
|
+
@column_data = column_data
|
11
|
+
if index
|
12
|
+
@index = index
|
13
|
+
else
|
14
|
+
@index = CArray.int(column_data.first[1].size).seq
|
15
|
+
end
|
16
|
+
dim = [@column_data[@column_names.first].size, @column_names.size]
|
17
|
+
extend CArray::TableMethods
|
18
|
+
super(:object, dim, :read_only=>true)
|
19
|
+
__create_mask__
|
20
|
+
end
|
21
|
+
|
22
|
+
attr_reader :column_names, :index
|
23
|
+
|
24
|
+
def fetch_index (idx)
|
25
|
+
r, c = *idx
|
26
|
+
name = @column_names[c]
|
27
|
+
return @column_data[name].value[r]
|
28
|
+
end
|
29
|
+
|
30
|
+
def store_index (idx, value)
|
31
|
+
r, c = *idx
|
32
|
+
name = @column_names[c]
|
33
|
+
return @column_data[name][r] = value
|
34
|
+
end
|
35
|
+
|
36
|
+
def copy_data (data)
|
37
|
+
@column_names.each_with_index do |name, i|
|
38
|
+
data[nil,i] = @column_data[name].value
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def sync_data (data)
|
43
|
+
@column_names.each_with_index do |name, i|
|
44
|
+
@column_data[name].value[] = data[nil,i]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def fill_data (value)
|
49
|
+
@column_names.each do |name|
|
50
|
+
@column_data[name] = value
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def create_mask
|
55
|
+
@column_names.each do |name|
|
56
|
+
@column_data[name].instance_eval{ __create_mask__ }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def mask_fetch_index (idx)
|
61
|
+
r, c = *idx
|
62
|
+
name = @column_names[c]
|
63
|
+
if @column_data[name].has_mask?
|
64
|
+
return @column_data[name].mask[r]
|
65
|
+
else
|
66
|
+
return 0
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def mask_store_index (idx, value)
|
71
|
+
r, c = *idx
|
72
|
+
name = @column_names[c]
|
73
|
+
if @column_data[name].has_mask?
|
74
|
+
return @column_data[name].mask[r] = value
|
75
|
+
else
|
76
|
+
@column_data[name].mask[r] = value
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def mask_copy_data (data)
|
81
|
+
@column_names.each_with_index do |name, i|
|
82
|
+
if @column_data[name].has_mask?
|
83
|
+
data[nil,i] = @column_data[name].mask
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def mask_sync_data (data)
|
89
|
+
@column_names.each_with_index do |name, i|
|
90
|
+
@column_data[name].mask[] = data[nil,i]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def mask_fill_data (value)
|
95
|
+
@column_names.each do |name|
|
96
|
+
@column_data[name].mask[] = value
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_ca
|
101
|
+
obj = super
|
102
|
+
obj.extend CArray::TableMethods
|
103
|
+
obj.column_names = @column_names
|
104
|
+
return obj
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
|
2
|
+
module CArray::TableMethods
|
3
|
+
|
4
|
+
def to_dataframe (index: nil, &block)
|
5
|
+
if self.size == 0
|
6
|
+
return nil
|
7
|
+
end
|
8
|
+
df = CADataFrame.new(self, index: index, &block)
|
9
|
+
if @header or @note
|
10
|
+
df.instance_variable_set(:@header, @header)
|
11
|
+
df.instance_variable_set(:@note, @note)
|
12
|
+
class << df
|
13
|
+
attr_reader :note
|
14
|
+
|
15
|
+
def header (name=nil)
|
16
|
+
if name
|
17
|
+
return @header[name.to_s]
|
18
|
+
else
|
19
|
+
return @column_names
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
return df
|
25
|
+
end
|
26
|
+
alias to_df to_dataframe
|
27
|
+
end
|
28
|
+
|
29
|
+
class CADataFrame
|
30
|
+
|
31
|
+
def to_a (with_index: true)
|
32
|
+
if @row_index and with_index
|
33
|
+
namelist = [""] + @column_names
|
34
|
+
tbl = CADFArray.new(namelist, @column_data.clone.update("" => index))
|
35
|
+
else
|
36
|
+
tbl = ca.to_ca
|
37
|
+
end
|
38
|
+
return tbl.to_a
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_csv (io = "", rs: $/, sep: ",", fill: "", with_index: true, time_format: nil, &block)
|
42
|
+
if @row_index and with_index
|
43
|
+
namelist = ["index"] + @column_names
|
44
|
+
columns = @column_data.clone.update("index" => index)
|
45
|
+
else
|
46
|
+
namelist = @column_names
|
47
|
+
columns = @column_data.clone
|
48
|
+
end
|
49
|
+
columns.each do |k, v|
|
50
|
+
if v.is_a?(CATimeIndex)
|
51
|
+
if time_format
|
52
|
+
columns[k] = v.time.time_format(time_format)
|
53
|
+
else
|
54
|
+
columns[k] = v.time.convert(:object){|t| t.to_s}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
tbl = CADFArray.new(namelist, columns)
|
59
|
+
return tbl.to_csv(io, **{rs: rs, sep: sep, fill: fill}, &block)
|
60
|
+
end
|
61
|
+
|
62
|
+
def to_daru
|
63
|
+
require "daru"
|
64
|
+
columns = {}
|
65
|
+
each_column_name do |name|
|
66
|
+
columns[name] = column(name).object.unmask(nil).to_a
|
67
|
+
end
|
68
|
+
if @row_index
|
69
|
+
return Daru::DataFrame.new(columns, index: @row_index.to_a, order: @column_names)
|
70
|
+
else
|
71
|
+
return Daru::DataFrame.new(columns, order: @column_names)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: false, &block)
|
76
|
+
require "axlsx"
|
77
|
+
xl = Axlsx::Package.new
|
78
|
+
xl.use_shared_strings = true
|
79
|
+
sheet = xl.workbook.add_worksheet(name: sheet_name)
|
80
|
+
df = self.to_df.objectify.unmask("=NA()")
|
81
|
+
if with_row_index
|
82
|
+
sheet.add_row([""] + column_names)
|
83
|
+
df.each_row_with_row_index(with: Array) do |list, i|
|
84
|
+
sheet.add_row([i] + list)
|
85
|
+
end
|
86
|
+
else
|
87
|
+
sheet.add_row(column_names)
|
88
|
+
df.each_row(with: Array) do |list|
|
89
|
+
sheet.add_row(list)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
if block_given?
|
93
|
+
yield sheet
|
94
|
+
end
|
95
|
+
xl.serialize(filename)
|
96
|
+
end
|
97
|
+
end
|