carray-dataframe 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/API.txt +83 -0
- data/README.md +5 -0
- data/carray-dataframe.gemspec +25 -0
- data/examples/R/fit.rb +24 -0
- data/examples/R/iris.rb +9 -0
- data/examples/R/japan_area.rb +30 -0
- data/examples/R/kyaku.rb +22 -0
- data/examples/group_by.rb +78 -0
- data/examples/hist.rb +27 -0
- data/examples/iris.rb +29 -0
- data/examples/map.rb +23 -0
- data/examples/match.rb +21 -0
- data/examples/test.xlsx +0 -0
- data/examples/test1.rb +44 -0
- data/examples/test2.rb +14 -0
- data/examples/test3.db +0 -0
- data/examples/test3.rb +11 -0
- data/examples/test3.xlsx +0 -0
- data/examples/to_excel.rb +27 -0
- data/lib/R.rb +365 -0
- data/lib/carray/autoload/autoload_dataframe_dataframe.rb +26 -0
- data/lib/carray/dataframe/dataframe.rb +1640 -0
- metadata +106 -0
data/examples/test2.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "carray"
|
3
|
+
require_relative "../lib/carray/dataframe/dataframe"
|
4
|
+
|
5
|
+
|
6
|
+
hash = { '温度' => [4,5,6,7], '湿度' => [10,20,30,40], '気圧差' => [100,50,-30,-50] }
|
7
|
+
df = CADataFrame.new(hash) {
|
8
|
+
double :温度, :湿度
|
9
|
+
}
|
10
|
+
|
11
|
+
p df
|
12
|
+
|
13
|
+
p df.温度
|
14
|
+
p df.湿度
|
data/examples/test3.db
ADDED
Binary file
|
data/examples/test3.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
require "carray"
|
2
|
+
require_relative "../lib/carray/dataframe/dataframe"
|
3
|
+
|
4
|
+
hash = { '番号' => [4,5,6,7], 'bbb' => [10,20,30,40], 'ccc' => [100,50,-30,-50] }
|
5
|
+
p df = CADataFrame.new(hash)
|
6
|
+
|
7
|
+
df.select{ (番号 % 2).eq(1) }[] = UNDEF
|
8
|
+
|
9
|
+
df.to_xlsx("test3.xlsx", with_row_index: true)
|
10
|
+
df.to_df.unmask(-999).to_sqlite3(database: "test3.db", table: "test")
|
11
|
+
|
data/examples/test3.xlsx
ADDED
Binary file
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require "carray"
|
2
|
+
|
3
|
+
df = CADataFrame.new :a=>[1,2,3,4,5,6,7,8,9,10]
|
4
|
+
df.arrange {
|
5
|
+
append :b, a**2
|
6
|
+
append :c, a.template(:object).seq("A", :succ)
|
7
|
+
}
|
8
|
+
|
9
|
+
df.to_xlsx("test.xlsx") {|sheet|
|
10
|
+
sheet.add_chart(Axlsx::ScatterChart, :start_at => [5,2], :end_at => [10, 15], :title => "Chart") do |chart|
|
11
|
+
chart.add_series :xData => sheet["A2:A11"],
|
12
|
+
:yData => sheet["B2:B11"],
|
13
|
+
:labels => sheet["C2:C11"],
|
14
|
+
:title => 'bob'
|
15
|
+
end
|
16
|
+
}
|
17
|
+
|
18
|
+
__END__
|
19
|
+
xl = Axlsx::Package.new
|
20
|
+
xl.use_shared_strings = true
|
21
|
+
sheet = xl.workbook.add_worksheet(name: 'Example')
|
22
|
+
sheet.add_row(df.column_names)
|
23
|
+
df.each_row do |list|
|
24
|
+
sheet.add_row(list)
|
25
|
+
end
|
26
|
+
|
27
|
+
endxl.serialize("test.xlsx")
|
data/lib/R.rb
ADDED
@@ -0,0 +1,365 @@
|
|
1
|
+
require "rsruby"
|
2
|
+
require "rsruby/erobj"
|
3
|
+
require "carray"
|
4
|
+
|
5
|
+
class RSRuby
|
6
|
+
|
7
|
+
#Converts a String representing a 'Ruby-style' R function name into a
|
8
|
+
#String with the real R name according to the rules given in the manual.
|
9
|
+
def RSRuby.convert_method_name (name)
|
10
|
+
if name.length > 1 and name[-1].chr == '_' and name[-2].chr != '_'
|
11
|
+
name = name[0..-2]
|
12
|
+
end
|
13
|
+
name = name.gsub(/__/,'<-')
|
14
|
+
name = name.gsub(/_/, '.')
|
15
|
+
return name
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
def R (expr = nil, hash = {}, &block)
|
21
|
+
if block
|
22
|
+
if expr
|
23
|
+
raise "don't give both of block and expresion"
|
24
|
+
else
|
25
|
+
R.instance_exec(&block)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
return R.call(expr, hash)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def R! (expr = nil, hash = {}, &block)
|
33
|
+
if block
|
34
|
+
if expr
|
35
|
+
raise "don't give both of block and expresion"
|
36
|
+
else
|
37
|
+
R.instance_exec(&block)
|
38
|
+
end
|
39
|
+
elsif expr.is_a?(Hash)
|
40
|
+
expr.each do |name, value|
|
41
|
+
R.instance.assign name.to_s, value
|
42
|
+
end
|
43
|
+
else
|
44
|
+
return R.exec(expr, hash)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
module R
|
49
|
+
|
50
|
+
class Receiver < ::ERObj
|
51
|
+
|
52
|
+
def initialize (klass, x)
|
53
|
+
@classname = klass
|
54
|
+
@attributes = R(%{ attributes(obj) },:obj=>x)
|
55
|
+
super(x)
|
56
|
+
end
|
57
|
+
|
58
|
+
attr_reader :attributes, :robj
|
59
|
+
|
60
|
+
def [] (sym = nil)
|
61
|
+
if sym
|
62
|
+
name = sym.to_s
|
63
|
+
name = name.gsub(/_/, '.')
|
64
|
+
begin
|
65
|
+
ret = @r['$'].call(@robj, name)
|
66
|
+
rescue RException
|
67
|
+
ret = @attributes[name]
|
68
|
+
end
|
69
|
+
return __converter__(ret)
|
70
|
+
else
|
71
|
+
return __converter__(to_ruby)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def __converter__ (arg)
|
76
|
+
case arg
|
77
|
+
when Array
|
78
|
+
return arg.to_ca.map!{|v| __converter__(v) }
|
79
|
+
when Hash
|
80
|
+
new_hash = {}
|
81
|
+
arg.each do |k,v|
|
82
|
+
new_hash[k] = __converter__(v)
|
83
|
+
end
|
84
|
+
return new_hash
|
85
|
+
when RObj
|
86
|
+
return R::CONVERTER[arg]
|
87
|
+
else
|
88
|
+
return arg
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def method
|
93
|
+
return self[:method]
|
94
|
+
end
|
95
|
+
|
96
|
+
def inspect
|
97
|
+
return "<R:Receiver: class=#{@classname} \n" \
|
98
|
+
" attributes=#{@attributes.inspect} \n" \
|
99
|
+
" data=#{to_ruby} >"
|
100
|
+
end
|
101
|
+
|
102
|
+
def method_missing (sym, *args)
|
103
|
+
if args.empty?
|
104
|
+
return self[sym]
|
105
|
+
else
|
106
|
+
super
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def to_ary
|
111
|
+
return [self.to_s]
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
CONVERSION_TABLE = {
|
117
|
+
"data.frame" => lambda{ |x| CADataFrame.from_R_data_frame(x) },
|
118
|
+
"ts" => lambda{ |x| R::TimeSeries.new(x) },
|
119
|
+
}
|
120
|
+
|
121
|
+
CONVERTER = lambda{|x|
|
122
|
+
case x
|
123
|
+
when RObj
|
124
|
+
klass = @r.eval_R("class").call(x)
|
125
|
+
if CONVERSION_TABLE.has_key?(klass)
|
126
|
+
CONVERSION_TABLE[klass][x]
|
127
|
+
else
|
128
|
+
case val = x.to_ruby
|
129
|
+
when Numeric, String
|
130
|
+
val
|
131
|
+
when Hash, NilClass
|
132
|
+
Receiver.new(klass, x)
|
133
|
+
when Array
|
134
|
+
val = val.to_ca
|
135
|
+
case klass
|
136
|
+
when "character", "factor"
|
137
|
+
val = val.maskout!(R.NA_character_)
|
138
|
+
when "integer"
|
139
|
+
val = val.maskout!(R.NA_integer_).int32
|
140
|
+
when "numeric"
|
141
|
+
val = val.maskout!(R.NA_real_).double
|
142
|
+
end
|
143
|
+
val
|
144
|
+
else
|
145
|
+
val
|
146
|
+
end
|
147
|
+
end
|
148
|
+
else
|
149
|
+
x
|
150
|
+
end
|
151
|
+
}
|
152
|
+
|
153
|
+
def self.run
|
154
|
+
if @r
|
155
|
+
return nil
|
156
|
+
end
|
157
|
+
ENV["LANG"] = "en_US.UTF-8"
|
158
|
+
ENV["LC_ALL"] = "en_US.UTF-8"
|
159
|
+
@r = RSRuby.instance
|
160
|
+
RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
|
161
|
+
@r.class_table['data.frame'] = lambda{|x| ERObj.new(x) }
|
162
|
+
@r.class_table['matrix'] = lambda{|x| ERObj.new(x) }
|
163
|
+
@r.proc_table[lambda{|x| true }] = CONVERTER
|
164
|
+
@NA_integer_ = R %{ NA_integer_ }
|
165
|
+
@NA_real_ = R %{ NA_real_ }
|
166
|
+
@NA_character_ = R %{ NA_character_ }
|
167
|
+
ObjectSpace.define_finalizer(self, proc{ @r.shutdown })
|
168
|
+
return nil
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.stop
|
172
|
+
@r.shutdown
|
173
|
+
@r = nil
|
174
|
+
end
|
175
|
+
|
176
|
+
class << self
|
177
|
+
attr_reader :NA_integer_, :NA_real_, :NA_character_
|
178
|
+
end
|
179
|
+
|
180
|
+
def self.instance
|
181
|
+
return @r
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.exec (expr, hash = {})
|
185
|
+
hash.each do |name, value|
|
186
|
+
@r.assign(name.to_s, __converter__(value))
|
187
|
+
end
|
188
|
+
return @r.eval_R(expr)
|
189
|
+
end
|
190
|
+
|
191
|
+
def self.call (expr, hash = {})
|
192
|
+
names = ["DU33Y"]
|
193
|
+
args = [0]
|
194
|
+
hash.each do |name, value|
|
195
|
+
names.push(name.to_s)
|
196
|
+
args.push(__converter__(value))
|
197
|
+
end
|
198
|
+
expr = "function (#{names.join(",")}) {" + expr + "}"
|
199
|
+
return @r.eval_R(expr).call(*args)
|
200
|
+
end
|
201
|
+
|
202
|
+
def self.__converter__ (arg)
|
203
|
+
case arg
|
204
|
+
when Symbol
|
205
|
+
return arg.to_s
|
206
|
+
when CArray
|
207
|
+
return __converter__(arg.as_r)
|
208
|
+
when CADataFrame
|
209
|
+
return arg.as_r
|
210
|
+
when Array
|
211
|
+
return arg.map{|v| __converter__(v) }
|
212
|
+
when Hash
|
213
|
+
new_hash = {}
|
214
|
+
arg.each do |k,v|
|
215
|
+
new_hash[k] = __converter__(v)
|
216
|
+
end
|
217
|
+
return new_hash
|
218
|
+
else
|
219
|
+
return arg
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def self.method_missing (sym, *args)
|
224
|
+
if args.empty? and sym.to_s[-1] == "!"
|
225
|
+
return @r.send(sym.to_s[0..-2].intern).call()
|
226
|
+
elsif args.size == 1 and sym.to_s[-1] == "="
|
227
|
+
return @r.assign(sym.to_s[0..-2], __converter__(args[0]))
|
228
|
+
else
|
229
|
+
return @r.send(sym, *args.map{|v| __converter__(v)})
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
end
|
234
|
+
|
235
|
+
class CArray
|
236
|
+
|
237
|
+
def guess_column_type_for_R
|
238
|
+
if is_a?(CArray)
|
239
|
+
if integer?
|
240
|
+
"integer"
|
241
|
+
elsif float?
|
242
|
+
"numeric"
|
243
|
+
elsif object?
|
244
|
+
notmasked = self[:is_not_masked].to_ca
|
245
|
+
if notmasked.convert(:boolean){|v| v.is_a?(Integer) }.all_equal?(1)
|
246
|
+
"integer"
|
247
|
+
elsif notmasked.convert(:boolean){|v| v.is_a?(Numeric) }.all_equal?(1)
|
248
|
+
"numeric"
|
249
|
+
elsif notmasked.convert(:boolean){|v| v.is_a?(String) }.all_equal?(1)
|
250
|
+
"character"
|
251
|
+
else
|
252
|
+
"unknown"
|
253
|
+
end
|
254
|
+
end
|
255
|
+
else
|
256
|
+
raise "invalid column name"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
def as_r
|
261
|
+
if has_mask?
|
262
|
+
case guess_column_type_for_R
|
263
|
+
when "integer"
|
264
|
+
out = unmask_copy(R.NA_integer_)
|
265
|
+
when "numeric"
|
266
|
+
out = unmask_copy(R.NA_real_)
|
267
|
+
else
|
268
|
+
out = unmask_copy(R.NA_character_)
|
269
|
+
end
|
270
|
+
else
|
271
|
+
out = self
|
272
|
+
end
|
273
|
+
if rank == 1
|
274
|
+
return out.to_a
|
275
|
+
elsif rank == 2
|
276
|
+
begin
|
277
|
+
mode = RSRuby.get_default_mode
|
278
|
+
RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
|
279
|
+
return R.matrix(out.flatten.to_a, :nrow=>dim0)
|
280
|
+
ensure
|
281
|
+
RSRuby.set_default_mode(mode)
|
282
|
+
end
|
283
|
+
else
|
284
|
+
return out.to_a
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
class CADataFrame
|
291
|
+
|
292
|
+
def self.from_R_data_frame (obj)
|
293
|
+
r = R.instance
|
294
|
+
RSRuby.set_default_mode(RSRuby::PROC_CONVERSION)
|
295
|
+
r.proc_table[lambda{|x| true }] = R::CONVERTER
|
296
|
+
dataframe = obj
|
297
|
+
column_names = r.colnames(obj).to_a
|
298
|
+
column_names = [column_names].flatten
|
299
|
+
row_names = r.attr(obj, 'row.names')
|
300
|
+
columns = {}
|
301
|
+
column_names.each do |name|
|
302
|
+
value = r['$'].call(obj, name.to_s)
|
303
|
+
case value
|
304
|
+
when CArray
|
305
|
+
columns[name] = value
|
306
|
+
when Array
|
307
|
+
columns[name] = value.to_ca
|
308
|
+
else
|
309
|
+
columns[name] = [value].to_ca
|
310
|
+
end
|
311
|
+
end
|
312
|
+
column_names.each do |name|
|
313
|
+
column = columns[name]
|
314
|
+
column.maskout!(nil)
|
315
|
+
end
|
316
|
+
return CADataFrame.new(columns, row_index: row_names ? row_names.to_ca : nil)
|
317
|
+
end
|
318
|
+
|
319
|
+
def as_r
|
320
|
+
r = R.instance
|
321
|
+
new_columns = {}
|
322
|
+
@column_names.each do |name|
|
323
|
+
column = @columns[name]
|
324
|
+
if column.has_mask?
|
325
|
+
case column.guess_column_type_for_R
|
326
|
+
when "integer"
|
327
|
+
column = column.unmask_copy(R.NA_integer_)
|
328
|
+
when "numeric"
|
329
|
+
column = column.unmask_copy(R.NA_real_)
|
330
|
+
else
|
331
|
+
column = column.unmask_copy(R.NA_character_)
|
332
|
+
end
|
333
|
+
end
|
334
|
+
new_columns[name] = R.__converter__(column.to_a)
|
335
|
+
end
|
336
|
+
mode = RSRuby.get_default_mode
|
337
|
+
RSRuby.set_default_mode(RSRuby::NO_CONVERSION)
|
338
|
+
return r.as_data_frame(:x => new_columns)
|
339
|
+
ensure
|
340
|
+
RSRuby.set_default_mode(mode)
|
341
|
+
end
|
342
|
+
|
343
|
+
end
|
344
|
+
|
345
|
+
class R::TimeSeries < ERObj
|
346
|
+
|
347
|
+
def start
|
348
|
+
return R.start(self)
|
349
|
+
end
|
350
|
+
|
351
|
+
def end
|
352
|
+
return R.end(self)
|
353
|
+
end
|
354
|
+
|
355
|
+
def frequency
|
356
|
+
return R.frequency(self)
|
357
|
+
end
|
358
|
+
|
359
|
+
def length
|
360
|
+
return R.length(self)
|
361
|
+
end
|
362
|
+
|
363
|
+
end
|
364
|
+
|
365
|
+
|
@@ -0,0 +1,26 @@
|
|
1
|
+
|
2
|
+
module CA::TableMethods
|
3
|
+
autoload_method "to_df", "carray/dataframe/dataframe"
|
4
|
+
end
|
5
|
+
|
6
|
+
autoload :CADataFrame, "carray/dataframe/dataframe"
|
7
|
+
|
8
|
+
autoload :DataFrame, "carray/dataframe/dataframe"
|
9
|
+
autoload :RSReceiver, "carray/dataframe/dataframe"
|
10
|
+
|
11
|
+
class RSRuby
|
12
|
+
autoload_method "setup", "carray/dataframe/dataframe"
|
13
|
+
autoload_method "recieve", "carray/dataframe/dataframe"
|
14
|
+
end
|
15
|
+
|
16
|
+
module Daru
|
17
|
+
class DataFrame
|
18
|
+
autoload_method "to_df", "carray/dataframe/dataframe"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
module SQLite3
|
23
|
+
class Database
|
24
|
+
autoload_method "to_df", "carray/dataframe/dataframe"
|
25
|
+
end
|
26
|
+
end
|