carray-dataframe 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/API.txt +83 -0
- data/README.md +5 -0
- data/carray-dataframe.gemspec +25 -0
- data/examples/R/fit.rb +24 -0
- data/examples/R/iris.rb +9 -0
- data/examples/R/japan_area.rb +30 -0
- data/examples/R/kyaku.rb +22 -0
- data/examples/group_by.rb +78 -0
- data/examples/hist.rb +27 -0
- data/examples/iris.rb +29 -0
- data/examples/map.rb +23 -0
- data/examples/match.rb +21 -0
- data/examples/test.xlsx +0 -0
- data/examples/test1.rb +44 -0
- data/examples/test2.rb +14 -0
- data/examples/test3.db +0 -0
- data/examples/test3.rb +11 -0
- data/examples/test3.xlsx +0 -0
- data/examples/to_excel.rb +27 -0
- data/lib/R.rb +365 -0
- data/lib/carray/autoload/autoload_dataframe_dataframe.rb +26 -0
- data/lib/carray/dataframe/dataframe.rb +1640 -0
- metadata +106 -0
data/examples/test2.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "carray"
|
3
|
+
require_relative "../lib/carray/dataframe/dataframe"
|
4
|
+
|
5
|
+
|
6
|
+
hash = { '温度' => [4,5,6,7], '湿度' => [10,20,30,40], '気圧差' => [100,50,-30,-50] }
|
7
|
+
df = CADataFrame.new(hash) {
|
8
|
+
double :温度, :湿度
|
9
|
+
}
|
10
|
+
|
11
|
+
p df
|
12
|
+
|
13
|
+
p df.温度
|
14
|
+
p df.湿度
|
data/examples/test3.db
ADDED
Binary file
|
data/examples/test3.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "carray"
|
2
|
+
require_relative "../lib/carray/dataframe/dataframe"
|
3
|
+
|
4
|
+
hash = { '番号' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
|
5
|
+
p df = CADataFrame.new(hash)
|
6
|
+
|
7
|
+
df.select{ (番号 % 2).eq(1) }[] = UNDEF
|
8
|
+
|
9
|
+
df.to_xlsx("test3.xlsx", with_row_index: true)
|
10
|
+
df.to_df.unmask(-999).to_sqlite3(database: "test3.db", table: "test")
|
11
|
+
|
data/examples/test3.xlsx
ADDED
Binary file
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "carray"
|
2
|
+
|
3
|
+
df = CADataFrame.new :a=>[1,2,3,4,5,6,7,8,9,10]
|
4
|
+
df.arrange {
|
5
|
+
append :b, a**2
|
6
|
+
append :c, a.template(:object).seq("A", :succ)
|
7
|
+
}
|
8
|
+
|
9
|
+
df.to_xlsx("test.xlsx") {|sheet|
|
10
|
+
sheet.add_chart(Axlsx::ScatterChart, :start_at => [5,2], :end_at => [10, 15], :title => "Chart") do |chart|
|
11
|
+
chart.add_series :xData => sheet["A2:A11"],
|
12
|
+
:yData => sheet["B2:B11"],
|
13
|
+
:labels => sheet["C2:C11"],
|
14
|
+
:title => 'bob'
|
15
|
+
end
|
16
|
+
}
|
17
|
+
|
18
|
+
__END__
|
19
|
+
xl = Axlsx::Package.new
|
20
|
+
xl.use_shared_strings = true
|
21
|
+
sheet = xl.workbook.add_worksheet(name: 'Example')
|
22
|
+
sheet.add_row(df.column_names)
|
23
|
+
df.each_row do |list|
|
24
|
+
sheet.add_row(list)
|
25
|
+
end
|
26
|
+
|
27
|
+
endxl.serialize("test.xlsx")
|
data/lib/R.rb
ADDED
@@ -0,0 +1,365 @@
|
|
1
|
+
require "rsruby"
|
2
|
+
require "rsruby/erobj"
|
3
|
+
require "carray"
|
4
|
+
|
5
|
+
class RSRuby
|
6
|
+
|
7
|
+
#Converts a String representing a 'Ruby-style' R function name into a
|
8
|
+
#String with the real R name according to the rules given in the manual.
|
9
|
+
def RSRuby.convert_method_name (name)
|
10
|
+
if name.length > 1 and name[-1].chr == '_' and name[-2].chr != '_'
|
11
|
+
name = name[0..-2]
|
12
|
+
end
|
13
|
+
name = name.gsub(/__/,'<-')
|
14
|
+
name = name.gsub(/_/, '.')
|
15
|
+
return name
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
def R (expr = nil, hash = {}, &block)
|
21
|
+
if block
|
22
|
+
if expr
|
23
|
+
raise "don't give both of block and expresion"
|
24
|
+
else
|
25
|
+
R.instance_exec(&block)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
return R.call(expr, hash)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def R! (expr = nil, hash = {}, &block)
|
33
|
+
if block
|
34
|
+
if expr
|
35
|
+
raise "don't give both of block and expresion"
|
36
|
+
else
|
37
|
+
R.instance_exec(&block)
|
38
|
+
end
|
39
|
+
elsif expr.is_a?(Hash)
|
40
|
+
expr.each do |name, value|
|
41
|
+
R.instance.assign name.to_s, value
|
42
|
+
end
|
43
|
+
else
|
44
|
+
return R.exec(expr, hash)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
module R
|
49
|
+
|
50
|
+
class Receiver < ::ERObj
|
51
|
+
|
52
|
+
def initialize (klass, x)
|
53
|
+
@classname = klass
|
54
|
+
@attributes = R(%{ attributes(obj) },:obj=>x)
|
55
|
+
super(x)
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :attributes, :robj
|
59
|
+
|
60
|
+
def [] (sym = nil)
|
61
|
+
if sym
|
62
|
+
name = sym.to_s
|
63
|
+
name = name.gsub(/_/, '.')
|
64
|
+
begin
|
65
|
+
ret = @r['$'].call(@robj, name)
|
66
|
+
rescue RException
|
67
|
+
ret = @attributes[name]
|
68
|
+
end
|
69
|
+
return __converter__(ret)
|
70
|
+
else
|
71
|
+
return __converter__(to_ruby)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def __converter__ (arg)
|
76
|
+
case arg
|
77
|
+
when Array
|
78
|
+
return arg.to_ca.map!{|v| __converter__(v) }
|
79
|
+
when Hash
|
80
|
+
new_hash = {}
|
81
|
+
arg.each do |k,v|
|
82
|
+
new_hash[k] = __converter__(v)
|
83
|
+
end
|
84
|
+
return new_hash
|
85
|
+
when RObj
|
86
|
+
return R::CONVERTER[arg]
|
87
|
+
else
|
88
|
+
return arg
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def method
|
93
|
+
return self[:method]
|
94
|
+
end
|
95
|
+
|
96
|
+
def inspect
|
97
|
+
return "<R:Receiver: class=#{@classname} \n" \
|
98
|
+
" attributes=#{@attributes.inspect} \n" \
|
99
|
+
" data=#{to_ruby} >"
|
100
|
+
end
|
101
|
+
|
102
|
+
def method_missing (sym, *args)
|
103
|
+
if args.empty?
|
104
|
+
return self[sym]
|
105
|
+
else
|
106
|
+
super
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_ary
|
111
|
+
return [self.to_s]
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
CONVERSION_TABLE = {
|
117
|
+
"data.frame" => lambda{ |x| CADataFrame.from_R_data_frame(x) },
|
118
|
+
"ts" => lambda{ |x| R::TimeSeries.new(x) },
|
119
|
+
}
|
120
|
+
|
121
|
+
CONVERTER = lambda{|x|
|
122
|
+
case x
|
123
|
+
when RObj
|
124
|
+
klass = @r.eval_R("class").call(x)
|
125
|
+
if CONVERSION_TABLE.has_key?(klass)
|
126
|
+
CONVERSION_TABLE[klass][x]
|
127
|
+
else
|
128
|
+
case val = x.to_ruby
|
129
|
+
when Numeric, String
|
130
|
+
val
|
131
|
+
when Hash, NilClass
|
132
|
+
Receiver.new(klass, x)
|
133
|
+
when Array
|
134
|
+
val = val.to_ca
|
135
|
+
case klass
|
136
|
+
when "character", "factor"
|
137
|
+
val = val.maskout!(R.NA_character_)
|
138
|
+
when "integer"
|
139
|
+
val = val.maskout!(R.NA_integer_).int32
|
140
|
+
when "numeric"
|
141
|
+
val = val.maskout!(R.NA_real_).double
|
142
|
+
end
|
143
|
+
val
|
144
|
+
else
|
145
|
+
val
|
146
|
+
end
|
147
|
+
end
|
148
|
+
else
|
149
|
+
x
|
150
|
+
end
|
151
|
+
}
|
152
|
+
|
153
|
+
def self.run
|
154
|
+
if @r
|
155
|
+
return nil
|
156
|
+
end
|
157
|
+
ENV["LANG"] = "en_US.UTF-8"
|
158
|
+
ENV["LC_ALL"] = "en_US.UTF-8"
|
159
|
+
@r = RSRuby.instance
|
160
|
+
RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
|
161
|
+
@r.class_table['data.frame'] = lambda{|x| ERObj.new(x) }
|
162
|
+
@r.class_table['matrix'] = lambda{|x| ERObj.new(x) }
|
163
|
+
@r.proc_table[lambda{|x| true }] = CONVERTER
|
164
|
+
@NA_integer_ = R %{ NA_integer_ }
|
165
|
+
@NA_real_ = R %{ NA_real_ }
|
166
|
+
@NA_character_ = R %{ NA_character_ }
|
167
|
+
ObjectSpace.define_finalizer(self, proc{ @r.shutdown })
|
168
|
+
return nil
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.stop
|
172
|
+
@r.shutdown
|
173
|
+
@r = nil
|
174
|
+
end
|
175
|
+
|
176
|
+
class << self
|
177
|
+
attr_reader :NA_integer_, :NA_real_, :NA_character_
|
178
|
+
end
|
179
|
+
|
180
|
+
def self.instance
|
181
|
+
return @r
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.exec (expr, hash = {})
|
185
|
+
hash.each do |name, value|
|
186
|
+
@r.assign(name.to_s, __converter__(value))
|
187
|
+
end
|
188
|
+
return @r.eval_R(expr)
|
189
|
+
end
|
190
|
+
|
191
|
+
def self.call (expr, hash = {})
|
192
|
+
names = ["DU33Y"]
|
193
|
+
args = [0]
|
194
|
+
hash.each do |name, value|
|
195
|
+
names.push(name.to_s)
|
196
|
+
args.push(__converter__(value))
|
197
|
+
end
|
198
|
+
expr = "function (#{names.join(",")}) {" + expr + "}"
|
199
|
+
return @r.eval_R(expr).call(*args)
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.__converter__ (arg)
|
203
|
+
case arg
|
204
|
+
when Symbol
|
205
|
+
return arg.to_s
|
206
|
+
when CArray
|
207
|
+
return __converter__(arg.as_r)
|
208
|
+
when CADataFrame
|
209
|
+
return arg.as_r
|
210
|
+
when Array
|
211
|
+
return arg.map{|v| __converter__(v) }
|
212
|
+
when Hash
|
213
|
+
new_hash = {}
|
214
|
+
arg.each do |k,v|
|
215
|
+
new_hash[k] = __converter__(v)
|
216
|
+
end
|
217
|
+
return new_hash
|
218
|
+
else
|
219
|
+
return arg
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def self.method_missing (sym, *args)
|
224
|
+
if args.empty? and sym.to_s[-1] == "!"
|
225
|
+
return @r.send(sym.to_s[0..-2].intern).call()
|
226
|
+
elsif args.size == 1 and sym.to_s[-1] == "="
|
227
|
+
return @r.assign(sym.to_s[0..-2], __converter__(args[0]))
|
228
|
+
else
|
229
|
+
return @r.send(sym, *args.map{|v| __converter__(v)})
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
class CArray
|
236
|
+
|
237
|
+
def guess_column_type_for_R
|
238
|
+
if is_a?(CArray)
|
239
|
+
if integer?
|
240
|
+
"integer"
|
241
|
+
elsif float?
|
242
|
+
"numeric"
|
243
|
+
elsif object?
|
244
|
+
notmasked = self[:is_not_masked].to_ca
|
245
|
+
if notmasked.convert(:boolean){|v| v.is_a?(Integer) }.all_equal?(1)
|
246
|
+
"integer"
|
247
|
+
elsif notmasked.convert(:boolean){|v| v.is_a?(Numeric) }.all_equal?(1)
|
248
|
+
"numeric"
|
249
|
+
elsif notmasked.convert(:boolean){|v| v.is_a?(String) }.all_equal?(1)
|
250
|
+
"character"
|
251
|
+
else
|
252
|
+
"unknown"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
else
|
256
|
+
raise "invalid column name"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def as_r
|
261
|
+
if has_mask?
|
262
|
+
case guess_column_type_for_R
|
263
|
+
when "integer"
|
264
|
+
out = unmask_copy(R.NA_integer_)
|
265
|
+
when "numeric"
|
266
|
+
out = unmask_copy(R.NA_real_)
|
267
|
+
else
|
268
|
+
out = unmask_copy(R.NA_character_)
|
269
|
+
end
|
270
|
+
else
|
271
|
+
out = self
|
272
|
+
end
|
273
|
+
if rank == 1
|
274
|
+
return out.to_a
|
275
|
+
elsif rank == 2
|
276
|
+
begin
|
277
|
+
mode = RSRuby.get_default_mode
|
278
|
+
RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
|
279
|
+
return R.matrix(out.flatten.to_a, :nrow=>dim0)
|
280
|
+
ensure
|
281
|
+
RSRuby.set_default_mode(mode)
|
282
|
+
end
|
283
|
+
else
|
284
|
+
return out.to_a
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
class CADataFrame
|
291
|
+
|
292
|
+
def self.from_R_data_frame (obj)
|
293
|
+
r = R.instance
|
294
|
+
RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
|
295
|
+
r.proc_table[lambda{|x| true }] = R::CONVERTER
|
296
|
+
dataframe = obj
|
297
|
+
column_names = r.colnames(obj).to_a
|
298
|
+
column_names = [column_names].flatten
|
299
|
+
row_names = r.attr(obj, 'row.names')
|
300
|
+
columns = {}
|
301
|
+
column_names.each do |name|
|
302
|
+
value = r['$'].call(obj, name.to_s)
|
303
|
+
case value
|
304
|
+
when CArray
|
305
|
+
columns[name] = value
|
306
|
+
when Array
|
307
|
+
columns[name] = value.to_ca
|
308
|
+
else
|
309
|
+
columns[name] = [value].to_ca
|
310
|
+
end
|
311
|
+
end
|
312
|
+
column_names.each do |name|
|
313
|
+
column = columns[name]
|
314
|
+
column.maskout!(nil)
|
315
|
+
end
|
316
|
+
return CADataFrame.new(columns, row_index: row_names ? row_names.to_ca : nil)
|
317
|
+
end
|
318
|
+
|
319
|
+
def as_r
|
320
|
+
r = R.instance
|
321
|
+
new_columns = {}
|
322
|
+
@column_names.each do |name|
|
323
|
+
column = @columns[name]
|
324
|
+
if column.has_mask?
|
325
|
+
case column.guess_column_type_for_R
|
326
|
+
when "integer"
|
327
|
+
column = column.unmask_copy(R.NA_integer_)
|
328
|
+
when "numeric"
|
329
|
+
column = column.unmask_copy(R.NA_real_)
|
330
|
+
else
|
331
|
+
column = column.unmask_copy(R.NA_character_)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
new_columns[name] = R.__converter__(column.to_a)
|
335
|
+
end
|
336
|
+
mode = RSRuby.get_default_mode
|
337
|
+
RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
|
338
|
+
return r.as_data_frame(:x => new_columns)
|
339
|
+
ensure
|
340
|
+
RSRuby.set_default_mode(mode)
|
341
|
+
end
|
342
|
+
|
343
|
+
end
|
344
|
+
|
345
|
+
class R::TimeSeries < ERObj
|
346
|
+
|
347
|
+
def start
|
348
|
+
return R.start(self)
|
349
|
+
end
|
350
|
+
|
351
|
+
def end
|
352
|
+
return R.end(self)
|
353
|
+
end
|
354
|
+
|
355
|
+
def frequency
|
356
|
+
return R.frequency(self)
|
357
|
+
end
|
358
|
+
|
359
|
+
def length
|
360
|
+
return R.length(self)
|
361
|
+
end
|
362
|
+
|
363
|
+
end
|
364
|
+
|
365
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
module CA::TableMethods
|
3
|
+
autoload_method "to_df", "carray/dataframe/dataframe"
|
4
|
+
end
|
5
|
+
|
6
|
+
autoload :CADataFrame, "carray/dataframe/dataframe"
|
7
|
+
|
8
|
+
autoload :DataFrame, "carray/dataframe/dataframe"
|
9
|
+
autoload :RSReceiver, "carray/dataframe/dataframe"
|
10
|
+
|
11
|
+
class RSRuby
|
12
|
+
autoload_method "setup", "carray/dataframe/dataframe"
|
13
|
+
autoload_method "recieve", "carray/dataframe/dataframe"
|
14
|
+
end
|
15
|
+
|
16
|
+
module Daru
|
17
|
+
class DataFrame
|
18
|
+
autoload_method "to_df", "carray/dataframe/dataframe"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module SQLite3
|
23
|
+
class Database
|
24
|
+
autoload_method "to_df", "carray/dataframe/dataframe"
|
25
|
+
end
|
26
|
+
end
|