carray 1.1.4 → 1.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/COPYING +56 -0
- data/GPL +340 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +33 -0
- data/LEGAL +50 -0
- data/NOTE +73 -0
- data/Rakefile +20 -0
- data/TODO +5 -0
- data/ca_iter_block.c +242 -0
- data/ca_iter_dimension.c +287 -0
- data/ca_iter_window.c +202 -0
- data/ca_obj_array.c +1189 -0
- data/ca_obj_bitarray.c +523 -0
- data/ca_obj_bitfield.c +636 -0
- data/ca_obj_block.c +885 -0
- data/ca_obj_fake.c +405 -0
- data/ca_obj_farray.c +482 -0
- data/ca_obj_field.c +625 -0
- data/ca_obj_grid.c +738 -0
- data/ca_obj_mapping.c +614 -0
- data/ca_obj_object.c +777 -0
- data/ca_obj_reduce.c +299 -0
- data/ca_obj_refer.c +627 -0
- data/ca_obj_repeat.c +640 -0
- data/ca_obj_select.c +558 -0
- data/ca_obj_shift.c +952 -0
- data/ca_obj_transpose.c +582 -0
- data/ca_obj_unbound_repeat.c +557 -0
- data/ca_obj_window.c +1023 -0
- data/carray.h +1381 -0
- data/carray_access.c +1798 -0
- data/carray_attribute.c +903 -0
- data/carray_call_cfunc.c +1107 -0
- data/carray_cast.c +1155 -0
- data/carray_cast_func.rb +498 -0
- data/carray_class.c +132 -0
- data/carray_conversion.c +518 -0
- data/carray_copy.c +453 -0
- data/carray_core.c +1307 -0
- data/carray_element.c +572 -0
- data/carray_generate.c +681 -0
- data/carray_iterator.c +630 -0
- data/carray_loop.c +462 -0
- data/carray_mask.c +1174 -0
- data/carray_math.rb +834 -0
- data/carray_numeric.c +257 -0
- data/carray_operator.c +582 -0
- data/carray_order.c +1040 -0
- data/carray_random.c +529 -0
- data/carray_sort_addr.c +261 -0
- data/carray_stat.c +2102 -0
- data/carray_stat_proc.rb +1990 -0
- data/carray_test.c +602 -0
- data/carray_undef.c +69 -0
- data/carray_utils.c +740 -0
- data/ext/calculus/carray_calculus.c +792 -0
- data/ext/calculus/carray_interp.c +355 -0
- data/ext/calculus/extconf.rb +12 -0
- data/ext/calculus/lib/autoload/autoload_math_calculus.rb +2 -0
- data/ext/calculus/lib/math/calculus.rb +119 -0
- data/ext/calculus/lib/math/interp/adapter_interp1d.rb +31 -0
- data/ext/dataframe/API.txt +11 -0
- data/ext/dataframe/extconf.rb +3 -0
- data/ext/dataframe/lib/carray/autoload/autoload_dataframe_dataframe.rb +14 -0
- data/ext/dataframe/lib/carray/dataframe/dataframe.rb +1104 -0
- data/ext/dataframe/sample/test_uniq_sort.rb +5 -0
- data/ext/fortio/extconf.rb +3 -0
- data/ext/fortio/lib/carray/autoload/autoload_fortran_format.rb +5 -0
- data/ext/fortio/lib/carray/io/fortran_format.rb +43 -0
- data/ext/fortio/lib/fortio.rb +3 -0
- data/ext/fortio/lib/fortio/fortran_format.rb +603 -0
- data/ext/fortio/lib/fortio/fortran_format.tab.rb +536 -0
- data/ext/fortio/lib/fortio/fortran_format.y +215 -0
- data/ext/fortio/lib/fortio/fortran_namelist.rb +151 -0
- data/ext/fortio/lib/fortio/fortran_namelist.tab.rb +470 -0
- data/ext/fortio/lib/fortio/fortran_namelist.y +213 -0
- data/ext/fortio/lib/fortio/fortran_sequential.rb +345 -0
- data/ext/fortio/ruby_fortio.c +182 -0
- data/ext/fortio/test/test_H.rb +5 -0
- data/ext/fortio/test/test_T.rb +7 -0
- data/ext/fortio/test/test_fortran_format.rb +86 -0
- data/ext/fortio/test/test_namelist.rb +25 -0
- data/ext/fortio/test/test_sequential.rb +13 -0
- data/ext/fortio/test/test_sequential2.rb +13 -0
- data/ext/fortio/work/test.rb +10 -0
- data/ext/fortio/work/test_e.rb +19 -0
- data/ext/fortio/work/test_ep.rb +10 -0
- data/ext/fortio/work/test_parse.rb +12 -0
- data/ext/imagemap/carray_imagemap.c +495 -0
- data/ext/imagemap/doc/call_graph.dot +28 -0
- data/ext/imagemap/draw.c +567 -0
- data/ext/imagemap/extconf.rb +13 -0
- data/ext/imagemap/lib/autoload/autoload_graphics_imagemap.rb +1 -0
- data/ext/imagemap/lib/graphics/imagemap.rb +273 -0
- data/ext/imagemap/lib/image_map.rb +4 -0
- data/ext/imagemap/test/swath_index.rb +83 -0
- data/ext/imagemap/test/swath_warp.rb +99 -0
- data/ext/imagemap/test/test.rb +23 -0
- data/ext/imagemap/test/test_image.rb +42 -0
- data/ext/imagemap/test/test_line.rb +14 -0
- data/ext/imagemap/test/test_rotate.rb +17 -0
- data/ext/imagemap/test/test_triangle.rb +20 -0
- data/ext/imagemap/test/test_warp.rb +26 -0
- data/ext/mathfunc/carray_mathfunc.c +321 -0
- data/ext/mathfunc/extconf.rb +18 -0
- data/ext/mathfunc/lib/autoload/autoload_math_mathfunc.rb +1 -0
- data/ext/mathfunc/lib/math/mathfunc.rb +15 -0
- data/ext/mathfunc/test/test_hypot.rb +5 -0
- data/ext/mathfunc/test/test_j0.rb +22 -0
- data/ext/mathfunc/test/test_jn.rb +8 -0
- data/ext/mathfunc/test/test_sph.rb +9 -0
- data/ext/narray/README +22 -0
- data/ext/narray/ca_wrap_narray.c +491 -0
- data/ext/narray/carray_narray.c +21 -0
- data/ext/narray/extconf.rb +57 -0
- data/ext/narray/lib/autoload/autoload_math_narray.rb +1 -0
- data/ext/narray/lib/autoload/autoload_math_narray_miss.rb +11 -0
- data/ext/narray/lib/math/narray.rb +17 -0
- data/ext/narray/lib/math/narray_miss.rb +45 -0
- data/extconf.rb +3 -25
- data/lib/carray.rb +28 -0
- data/lib/carray/autoload/autoload_base.rb +23 -0
- data/lib/carray/autoload/autoload_graphics_gnuplot.rb +2 -0
- data/lib/carray/autoload/autoload_io_csv.rb +14 -0
- data/lib/carray/autoload/autoload_io_excel.rb +5 -0
- data/lib/carray/autoload/autoload_io_imagemagick.rb +6 -0
- data/lib/carray/autoload/autoload_io_pg.rb +6 -0
- data/lib/carray/autoload/autoload_io_sqlite3.rb +12 -0
- data/lib/carray/autoload/autoload_io_table.rb +1 -0
- data/lib/carray/autoload/autoload_math_histogram.rb +5 -0
- data/lib/carray/autoload/autoload_math_interp.rb +4 -0
- data/lib/carray/autoload/autoload_math_recurrence.rb +6 -0
- data/lib/carray/autoload/autoload_object_iterator.rb +1 -0
- data/lib/carray/autoload/autoload_object_link.rb +1 -0
- data/lib/carray/autoload/autoload_object_pack.rb +2 -0
- data/lib/carray/base/autoload.rb +94 -0
- data/lib/carray/base/basic.rb +1051 -0
- data/lib/carray/base/inspect.rb +252 -0
- data/lib/carray/base/iterator.rb +367 -0
- data/lib/carray/base/math.rb +403 -0
- data/lib/carray/base/obsolete.rb +93 -0
- data/lib/carray/base/serialize.rb +260 -0
- data/lib/carray/base/struct.rb +634 -0
- data/lib/carray/graphics/gnuplot.rb +2116 -0
- data/lib/carray/info.rb +112 -0
- data/lib/carray/io/csv.rb +560 -0
- data/lib/carray/io/excel.rb +26 -0
- data/lib/carray/io/imagemagick.rb +231 -0
- data/lib/carray/io/pg.rb +101 -0
- data/lib/carray/io/sqlite3.rb +202 -0
- data/lib/carray/io/table.rb +77 -0
- data/lib/carray/math/histogram.rb +179 -0
- data/lib/carray/math/interp.rb +57 -0
- data/lib/carray/math/interp/adapter_gsl_spline.rb +47 -0
- data/lib/carray/math/recurrence.rb +95 -0
- data/lib/carray/mkmf.rb +145 -0
- data/lib/carray/object/ca_obj_iterator.rb +52 -0
- data/lib/carray/object/ca_obj_link.rb +52 -0
- data/lib/carray/object/ca_obj_pack.rb +101 -0
- data/mkmath.rb +731 -0
- data/mt19937ar.c +182 -0
- data/mt19937ar.h +86 -0
- data/rdoc_main.rb +27 -0
- data/rdoc_math.rb +5 -0
- data/rdoc_stat.rb +31 -0
- data/ruby_carray.c +242 -0
- data/ruby_ccomplex.c +497 -0
- data/ruby_float_func.c +83 -0
- data/spec/CABlockIterator/CABlockIterator_spec.rb +113 -0
- data/spec/CArray/bug/store_spec.rb +27 -0
- data/spec/CArray/index/repeat_spec.rb +10 -0
- data/spec/CArray/method/eq_spec.rb +80 -0
- data/spec/CArray/method/is_nan_spec.rb +12 -0
- data/spec/CArray/method/ne_spec.rb +18 -0
- data/spec/CArray/method/round_spec.rb +11 -0
- data/spec/CArray/object/_attribute_spec.rb +32 -0
- data/spec/CArray/object/s_new_spec.rb +31 -0
- data/spec/CArray/serialize/Serialization_spec.rb +89 -0
- data/spec/spec_all.rb +11 -0
- data/test/test_ALL.rb +50 -0
- data/test/test_CABitfield.rb +59 -0
- data/test/test_CABlock.rb +208 -0
- data/test/test_CAField.rb +40 -0
- data/test/test_CAGrid.rb +76 -0
- data/test/test_CAMapping.rb +106 -0
- data/test/test_CAMmap.rb +11 -0
- data/test/test_CARefer.rb +94 -0
- data/test/test_CARepeat.rb +66 -0
- data/test/test_CASelect.rb +23 -0
- data/test/test_CAShift.rb +17 -0
- data/test/test_CATranspose.rb +61 -0
- data/test/test_CAVirtual.rb +214 -0
- data/test/test_CAWindow.rb +55 -0
- data/test/test_CAWrap.rb +9 -0
- data/test/test_CArray.rb +228 -0
- data/test/test_CComplex.rb +83 -0
- data/test/test_CScalar.rb +91 -0
- data/test/test_attribute.rb +281 -0
- data/test/test_block_iterator.rb +17 -0
- data/test/test_boolean.rb +99 -0
- data/test/test_cast.rb +33 -0
- data/test/test_class.rb +85 -0
- data/test/test_complex.rb +43 -0
- data/test/test_composite.rb +125 -0
- data/test/test_convert.rb +79 -0
- data/test/test_copy.rb +141 -0
- data/test/test_creation.rb +85 -0
- data/test/test_element.rb +146 -0
- data/test/test_extream.rb +55 -0
- data/test/test_generate.rb +75 -0
- data/test/test_index.rb +71 -0
- data/test/test_mask.rb +578 -0
- data/test/test_math.rb +98 -0
- data/test/test_narray.rb +64 -0
- data/test/test_order.rb +147 -0
- data/test/test_random.rb +15 -0
- data/test/test_ref_store.rb +211 -0
- data/test/test_stat.rb +414 -0
- data/test/test_struct.rb +72 -0
- data/test/test_virtual.rb +49 -0
- data/utils/ca_ase.rb +21 -0
- data/utils/ca_methods.rb +15 -0
- data/utils/cast_checker.rb +30 -0
- data/utils/create_rdoc.sh +9 -0
- data/utils/diff_method.rb +52 -0
- data/utils/extract_rdoc.rb +27 -0
- data/utils/make_tgz.sh +3 -0
- data/utils/remove_resource_fork.sh +5 -0
- data/version.h +3 -3
- metadata +266 -1
@@ -0,0 +1,31 @@
|
|
1
|
+
# ----------------------------------------------------------------------------
|
2
|
+
#
|
3
|
+
# carray/math/interp/adapter_interp1d.rb
|
4
|
+
#
|
5
|
+
# This file is part of Ruby/CArray extension library.
|
6
|
+
# You can redistribute it and/or modify it under the terms of
|
7
|
+
# the Ruby Licence.
|
8
|
+
#
|
9
|
+
# Copyright (C) 2005 Hiroki Motoyoshi
|
10
|
+
#
|
11
|
+
# ----------------------------------------------------------------------------
|
12
|
+
|
13
|
+
require "carray/math/interp"
|
14
|
+
|
15
|
+
class CA::Interp::CAInterp1D < CA::Interp::Adapter
|
16
|
+
|
17
|
+
install_adapter "interp1d"
|
18
|
+
|
19
|
+
def initialize (scales, value, options={})
|
20
|
+
@y = value
|
21
|
+
@x = scales
|
22
|
+
end
|
23
|
+
|
24
|
+
def evaluate (x0)
|
25
|
+
@y.interpolate(@x, x0)
|
26
|
+
end
|
27
|
+
|
28
|
+
alias grid evaluate
|
29
|
+
|
30
|
+
end
|
31
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
CADataFrame
|
2
|
+
|
3
|
+
#append(name) { INSTANCE_CONTEXT } <- any carray
|
4
|
+
#lead(name) { INSTANCE_CONTEXT } <- any carray
|
5
|
+
#execute { INSTANCE_CONTEXT } => any object
|
6
|
+
#select(name...) { INSTANCE_CONTEXT } <- boolean carray
|
7
|
+
#reorder { INSTANCE_CONTEXT } <- int32 carray (addresses for mapping)
|
8
|
+
#order_by { INSTANCE_CONTEXT } <- Array of int32 carray or carray (addresses for mapping)
|
9
|
+
|
10
|
+
#calculate {|label, column| CALLER_CONTEXT } <- scalar
|
11
|
+
#resample {|label, column| CALLER_CONTEXT } <- any carray
|
@@ -0,0 +1,14 @@
|
|
1
|
+
|
2
|
+
module CA::TableMethods
|
3
|
+
autoload_method "to_dataframe", "carray/dataframe/dataframe"
|
4
|
+
end
|
5
|
+
|
6
|
+
autoload :CADataFrame, "carray/dataframe/dataframe"
|
7
|
+
|
8
|
+
autoload :DataFrame, "carray/dataframe/dataframe"
|
9
|
+
autoload :RSReceiver, "carray/dataframe/dataframe"
|
10
|
+
|
11
|
+
class RSRuby
|
12
|
+
autoload_method "setup", "carray/dataframe/dataframe"
|
13
|
+
autoload_method "recieve", "carray/dataframe/dataframe"
|
14
|
+
end
|
@@ -0,0 +1,1104 @@
|
|
1
|
+
require "carray"
|
2
|
+
require "carray/io/table"
|
3
|
+
|
4
|
+
module CA::TableMethods
|
5
|
+
|
6
|
+
def to_dataframe (&block)
|
7
|
+
return CADataFrame.new(self, &block)
|
8
|
+
end
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
class CADataFrame
|
13
|
+
|
14
|
+
def initialize (columns_or_table, row_index = nil, column_names = nil, &block)
|
15
|
+
case columns_or_table
|
16
|
+
when Hash
|
17
|
+
columns = columns_or_table
|
18
|
+
@column_names = columns.keys.map(&:to_s)
|
19
|
+
@columns = normalize_columns(columns)
|
20
|
+
@column_number = @column_names.size
|
21
|
+
@row_number = @columns.first[1].size
|
22
|
+
if @column_names.any?{ |key| @columns[key].size != @row_number }
|
23
|
+
raise "column sizes mismatch"
|
24
|
+
end
|
25
|
+
when CArray
|
26
|
+
table = columns_or_table
|
27
|
+
if column_names
|
28
|
+
@column_names = column_names.map(&:to_s)
|
29
|
+
else
|
30
|
+
@column_names = table.column_names.map(&:to_s)
|
31
|
+
end
|
32
|
+
@columns = table_to_columns(table)
|
33
|
+
@column_number = @column_names.size
|
34
|
+
@row_number = table.dim0
|
35
|
+
else
|
36
|
+
raise "unknown data"
|
37
|
+
end
|
38
|
+
if row_index
|
39
|
+
@row_index = row_index.to_ca.object
|
40
|
+
else
|
41
|
+
@row_index = nil
|
42
|
+
end
|
43
|
+
@__methods__ = {}
|
44
|
+
if block_given?
|
45
|
+
arrange(&block)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def __methods__
|
50
|
+
return @__methods__
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def table_to_columns (table)
|
56
|
+
new_columns = {}
|
57
|
+
@column_names.each_with_index do |name, i|
|
58
|
+
new_columns[name] = table[nil,i]
|
59
|
+
end
|
60
|
+
return new_columns
|
61
|
+
end
|
62
|
+
|
63
|
+
def normalize_columns (columns)
|
64
|
+
new_columns = {}
|
65
|
+
columns.each_key do |key|
|
66
|
+
case columns[key]
|
67
|
+
when CArray
|
68
|
+
column = columns[key]
|
69
|
+
when Array
|
70
|
+
column = columns[key].to_ca
|
71
|
+
if column.rank != 1
|
72
|
+
list = columns[key].clone
|
73
|
+
column = CArray.object(list.size).convert { list.shift }
|
74
|
+
end
|
75
|
+
else
|
76
|
+
column = columns[key].to_ca
|
77
|
+
end
|
78
|
+
new_columns[key.to_s] = column
|
79
|
+
end
|
80
|
+
return new_columns
|
81
|
+
end
|
82
|
+
|
83
|
+
public
|
84
|
+
|
85
|
+
attr_reader :columns, :column_names, :row_index, :column_number, :row_number
|
86
|
+
|
87
|
+
def column_types
|
88
|
+
return @columns_names.map{|name| @columns[name].data_type_name }
|
89
|
+
end
|
90
|
+
|
91
|
+
def each_column (&block)
|
92
|
+
return @columns.each(&block)
|
93
|
+
end
|
94
|
+
|
95
|
+
def each_row (with_row_index: false, &block)
|
96
|
+
if with_row_index and @row_index
|
97
|
+
@row_number.times do |i|
|
98
|
+
yield [@row_index[i]] + @columns.map{|n,c| c[i] }
|
99
|
+
end
|
100
|
+
else
|
101
|
+
@row_number.times do |i|
|
102
|
+
yield @columns.map{|n,c| c[i] }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
return self
|
106
|
+
end
|
107
|
+
|
108
|
+
def method (hash)
|
109
|
+
new_hash = {}
|
110
|
+
hash.each do |key, value|
|
111
|
+
new_hash[key.to_s] = value.to_s
|
112
|
+
end
|
113
|
+
@__methods__.update(new_hash)
|
114
|
+
end
|
115
|
+
|
116
|
+
def col (name_or_index)
|
117
|
+
case name_or_index
|
118
|
+
when Integer
|
119
|
+
return @columns[@column_names[name_or_index]]
|
120
|
+
when String, Symbol
|
121
|
+
return @columns[name_or_index.to_s]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def template (*args, &block)
|
126
|
+
return @columns.first[1].template(*args, &block)
|
127
|
+
end
|
128
|
+
|
129
|
+
def row (idx)
|
130
|
+
if @row_index
|
131
|
+
addr = @row_index.search(idx)
|
132
|
+
return @column_names.map{|name| @columns[name][addr]}.to_ca
|
133
|
+
else
|
134
|
+
return @column_names.map{|name| @columns[name][idx]}.to_ca
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def [] (row, col = nil)
|
139
|
+
if row.is_a?(Integer)
|
140
|
+
row = [row]
|
141
|
+
end
|
142
|
+
if col
|
143
|
+
if col.is_a?(Integer)
|
144
|
+
col = [col]
|
145
|
+
end
|
146
|
+
keys = @column_names.to_ca[col].to_a
|
147
|
+
values = @columns.values_at(*keys)
|
148
|
+
new_columns = {}
|
149
|
+
keys.each do |key|
|
150
|
+
new_columns[key] = @columns[key][row]
|
151
|
+
end
|
152
|
+
return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
|
153
|
+
else
|
154
|
+
new_columns = {}
|
155
|
+
@column_names.each do |key|
|
156
|
+
new_columns[key] = @columns[key][row]
|
157
|
+
end
|
158
|
+
return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def fill (*names, value)
|
163
|
+
names.each do |name|
|
164
|
+
@columns[name.to_s].fill(value)
|
165
|
+
end
|
166
|
+
return self
|
167
|
+
end
|
168
|
+
|
169
|
+
def arrange (&block)
|
170
|
+
return Arranger.new(self).arrange(&block)
|
171
|
+
end
|
172
|
+
|
173
|
+
def rename (name1, name2)
|
174
|
+
if idx = @column_names.index(name1.to_s)
|
175
|
+
@column_names[idx] = name2.to_s
|
176
|
+
column = @columns[name1.to_s]
|
177
|
+
@columns.delete(name1.to_s)
|
178
|
+
@columns[name2.to_s] = column
|
179
|
+
else
|
180
|
+
raise "unknown column name #{name1}"
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def downcase
|
185
|
+
new_column_names = []
|
186
|
+
new_columns = {}
|
187
|
+
@column_names.each do |name|
|
188
|
+
new_column_names << name.downcase
|
189
|
+
new_columns[name.downcase] = @columns[name]
|
190
|
+
end
|
191
|
+
@column_names = new_column_names
|
192
|
+
@columns = new_columns
|
193
|
+
return self
|
194
|
+
end
|
195
|
+
|
196
|
+
def append (name, new_column = nil, &block)
|
197
|
+
if new_column
|
198
|
+
# do nothing
|
199
|
+
elsif block
|
200
|
+
new_column = instance_exec(&block)
|
201
|
+
else
|
202
|
+
new_column = @columns.first[1].template(:object)
|
203
|
+
end
|
204
|
+
unless new_column.is_a?(CArray)
|
205
|
+
new_column = new_column.to_ca
|
206
|
+
end
|
207
|
+
new_columns = {}
|
208
|
+
@column_names.each do |key|
|
209
|
+
new_columns[key] = @columns[key]
|
210
|
+
end
|
211
|
+
new_columns[name.to_s] = new_column
|
212
|
+
return CADataFrame.new(new_columns, @row_index)
|
213
|
+
end
|
214
|
+
|
215
|
+
def lead (name, new_column = nil, &block)
|
216
|
+
if new_column
|
217
|
+
# do nothing
|
218
|
+
elsif block
|
219
|
+
new_column = instance_exec(&block)
|
220
|
+
else
|
221
|
+
new_column = @columns.first[1].template(:object)
|
222
|
+
end
|
223
|
+
unless new_column.is_a?(CArray)
|
224
|
+
new_column = new_column.to_ca
|
225
|
+
end
|
226
|
+
new_columns = {}
|
227
|
+
new_columns[name.to_s] = new_column
|
228
|
+
@column_names.each do |key|
|
229
|
+
new_columns[key] = @columns[key]
|
230
|
+
end
|
231
|
+
return CADataFrame.new(new_columns, @row_index)
|
232
|
+
end
|
233
|
+
|
234
|
+
def vacant_copy
|
235
|
+
new_columns = {}
|
236
|
+
@column_names.each do |key|
|
237
|
+
new_columns[key] = CArray.object(0)
|
238
|
+
end
|
239
|
+
return CADataFrame.new(new_columns)
|
240
|
+
end
|
241
|
+
|
242
|
+
def merge (*args)
|
243
|
+
return CADataFrame.merge(self, *args)
|
244
|
+
end
|
245
|
+
|
246
|
+
|
247
|
+
def execute (&block)
|
248
|
+
return instance_exec(&block)
|
249
|
+
end
|
250
|
+
|
251
|
+
def calculate (label, &block)
|
252
|
+
hash = {}
|
253
|
+
@column_names.each do |name|
|
254
|
+
begin
|
255
|
+
if block
|
256
|
+
hash[name] = [yield(name, @columns[name])]
|
257
|
+
else
|
258
|
+
hash[name] = [@columns[name].send(label.intern)]
|
259
|
+
end
|
260
|
+
rescue
|
261
|
+
hash[name] = [UNDEF]
|
262
|
+
end
|
263
|
+
end
|
264
|
+
return CADataFrame.new(hash, [label])
|
265
|
+
end
|
266
|
+
|
267
|
+
def resample (&block)
|
268
|
+
new_columns = {}
|
269
|
+
@column_names.each do |name|
|
270
|
+
begin
|
271
|
+
new_columns[name] = yield(name, @columns[name])
|
272
|
+
rescue
|
273
|
+
end
|
274
|
+
end
|
275
|
+
return CADataFrame.new(new_columns)
|
276
|
+
end
|
277
|
+
|
278
|
+
def select (*names, &block)
|
279
|
+
if names.empty?
|
280
|
+
names = @column_names
|
281
|
+
end
|
282
|
+
if block
|
283
|
+
row = instance_exec(&block)
|
284
|
+
else
|
285
|
+
row = nil
|
286
|
+
end
|
287
|
+
new_columns = {}
|
288
|
+
names.map(&:to_s).each do |name|
|
289
|
+
new_columns[name] = @columns[name][row]
|
290
|
+
end
|
291
|
+
return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
|
292
|
+
end
|
293
|
+
|
294
|
+
def eliminate (*names)
|
295
|
+
if names.empty?
|
296
|
+
return self
|
297
|
+
end
|
298
|
+
names = names.map(&:to_s)
|
299
|
+
new_columns = {}
|
300
|
+
@column_names.each do |name|
|
301
|
+
unless names.include?(name)
|
302
|
+
new_columns[name] = @columns[name]
|
303
|
+
end
|
304
|
+
end
|
305
|
+
return CADataFrame.new(new_columns, @row_index)
|
306
|
+
end
|
307
|
+
|
308
|
+
def matchup (keyname, reference)
|
309
|
+
key = @columns[keyname.to_s]
|
310
|
+
idx = reference.matchup(key)
|
311
|
+
new_columns = {}
|
312
|
+
@column_names.each do |name|
|
313
|
+
if name == keyname
|
314
|
+
new_columns[name] = reference
|
315
|
+
else
|
316
|
+
new_columns[name] = @columns[name].project(idx)
|
317
|
+
end
|
318
|
+
end
|
319
|
+
if @row_index
|
320
|
+
new_row_index = @row_index.project(idx).unmask(nil)
|
321
|
+
else
|
322
|
+
new_row_index = nil
|
323
|
+
end
|
324
|
+
return CADataFrame.new(new_columns, new_row_index) {
|
325
|
+
self.send(keyname)[] = reference
|
326
|
+
}
|
327
|
+
end
|
328
|
+
|
329
|
+
def reorder (&block)
|
330
|
+
index = instance_exec(&block)
|
331
|
+
new_columns = {}
|
332
|
+
@column_names.each do |name|
|
333
|
+
new_columns[name] = @columns[name][index]
|
334
|
+
end
|
335
|
+
return CADataFrame.new(new_columns, @row_index ? @row_index[index] : nil)
|
336
|
+
end
|
337
|
+
|
338
|
+
def order_by (*names, &block)
|
339
|
+
if names.empty?
|
340
|
+
if block
|
341
|
+
ret = instance_exec(&block)
|
342
|
+
case ret
|
343
|
+
when CArray
|
344
|
+
list = [ret]
|
345
|
+
when Array
|
346
|
+
list = ret
|
347
|
+
end
|
348
|
+
end
|
349
|
+
else
|
350
|
+
list = @columns.values_at(*names.map{|s| s.to_s})
|
351
|
+
end
|
352
|
+
return reorder { CA.sort_addr(*list) }
|
353
|
+
end
|
354
|
+
|
355
|
+
def reverse
|
356
|
+
new_columns = {}
|
357
|
+
@column_names.each do |name|
|
358
|
+
new_columns[name] = @columns[name].reverse
|
359
|
+
end
|
360
|
+
return CADataFrame.new(new_columns, @row_index ? @row_index.reverse : nil)
|
361
|
+
end
|
362
|
+
|
363
|
+
def transpose (header = nil)
|
364
|
+
if header
|
365
|
+
column_names = header.map(&:to_s)
|
366
|
+
else
|
367
|
+
if @row_index
|
368
|
+
column_names = @row_index.convert(:object) {|v| v.to_s }
|
369
|
+
else
|
370
|
+
column_names = CArray.object(@row_number).seq("a",:succ)
|
371
|
+
end
|
372
|
+
end
|
373
|
+
return CADataFrame.new(ca.transpose, @column_names.to_ca, column_names)
|
374
|
+
end
|
375
|
+
|
376
|
+
def histogram (name, scale = nil, options = nil)
|
377
|
+
if scale.nil?
|
378
|
+
return group_by(name).table{ { :count => col(name).count_valid } }
|
379
|
+
else
|
380
|
+
if options
|
381
|
+
hist = CAHistogram.int(scale, options)
|
382
|
+
else
|
383
|
+
hist = CAHistogram.int(scale)
|
384
|
+
end
|
385
|
+
hist.increment(@columns[name.to_s])
|
386
|
+
hash = {
|
387
|
+
name.to_s => hist.midpoints[0],
|
388
|
+
"#{name}_L".to_s => scale[0..-2],
|
389
|
+
"#{name}_R".to_s => scale.shift(-1)[0..-2],
|
390
|
+
:count => hist[0..-2].to_ca,
|
391
|
+
}
|
392
|
+
return CADataFrame.new(hash)
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
def classify (name, scale = nil, opt = {})
|
397
|
+
if not scale
|
398
|
+
column = @columns[name.to_s]
|
399
|
+
mids = column.uniq
|
400
|
+
mapper = {}
|
401
|
+
mids.each_with_index do |v,i|
|
402
|
+
mapper[v] = i
|
403
|
+
end
|
404
|
+
cls = columns.convert(:int32) {|v| mapper[v] }
|
405
|
+
hash = {
|
406
|
+
"#{name}_M" => mids,
|
407
|
+
"#{name}_L" => mids,
|
408
|
+
"#{name}_R" => mids,
|
409
|
+
"#{name}_CLASS" => cls
|
410
|
+
}
|
411
|
+
else
|
412
|
+
option = {
|
413
|
+
:include_upper => false,
|
414
|
+
:include_lowest => true,
|
415
|
+
:offset => 0,
|
416
|
+
}.update(opt)
|
417
|
+
column = @columns[name.to_s]
|
418
|
+
cls = scale.bin(column,
|
419
|
+
option[:include_upper],
|
420
|
+
option[:include_lowest],
|
421
|
+
option[:offset])
|
422
|
+
mids = ((scale + scale.shifted(-1))/2)[0..-2].to_ca
|
423
|
+
left = scale[0..-2]
|
424
|
+
right = scale.shift(-1)[0..-2]
|
425
|
+
hash = {
|
426
|
+
"#{name}_M" => mids.project(cls).to_ca,
|
427
|
+
"#{name}_L" => left.project(cls).to_ca,
|
428
|
+
"#{name}_R" => right.project(cls).to_ca,
|
429
|
+
"#{name}_CLASS" => cls
|
430
|
+
}
|
431
|
+
end
|
432
|
+
return CADataFrame.new(hash)
|
433
|
+
end
|
434
|
+
|
435
|
+
def suffix (suf)
|
436
|
+
new_columns = {}
|
437
|
+
@column_names.each do |name|
|
438
|
+
new_name = (name.to_s + suf).to_s
|
439
|
+
new_columns[new_name] = @columns[name]
|
440
|
+
end
|
441
|
+
return CADataFrame.new(new_columns, @row_index)
|
442
|
+
end
|
443
|
+
|
444
|
+
def ca (*names)
|
445
|
+
if names.empty?
|
446
|
+
return CADFArray.new(@column_names, @columns)
|
447
|
+
else
|
448
|
+
return CADFArray.new(names.map(&:to_s), @columns)
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
def to_ca (*names)
|
453
|
+
return ca(*names).to_ca
|
454
|
+
end
|
455
|
+
|
456
|
+
def to_hash (name1, name2)
|
457
|
+
return CArray.join([@columns[name1.to_s], @columns[name2.to_s]]).to_a.to_h
|
458
|
+
end
|
459
|
+
|
460
|
+
def ascii_table (rowmax = :full)
|
461
|
+
if @row_index
|
462
|
+
namelist = [""] + @column_names
|
463
|
+
tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
|
464
|
+
else
|
465
|
+
namelist = @column_names
|
466
|
+
tbl = to_ca
|
467
|
+
end
|
468
|
+
if rowmax.is_a?(Integer) and @row_number > rowmax
|
469
|
+
list = tbl[0..(rowmax/2),nil].to_a
|
470
|
+
list.push namelist.map { "..." }
|
471
|
+
list.push *(tbl[-rowmax/2+1..-1,nil].to_a)
|
472
|
+
tbl = list.to_ca
|
473
|
+
end
|
474
|
+
datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("")
|
475
|
+
datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0)
|
476
|
+
namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0)
|
477
|
+
mb = datamb.or(namemb)
|
478
|
+
namelen = namelist.map(&:length).to_ca
|
479
|
+
datalen = datastr.convert(&:length)
|
480
|
+
if mb.max == 0
|
481
|
+
if datalen.size == 0
|
482
|
+
lengths = namelen.to_a
|
483
|
+
else
|
484
|
+
lengths = datalen.max(0).pmax(namelen).to_a
|
485
|
+
end
|
486
|
+
hrule = "-" + lengths.map {|len| "-"*len}.join("--") + "-"
|
487
|
+
header = " " +
|
488
|
+
[namelist, lengths].transpose.map{|name, len|
|
489
|
+
"#{name.to_s.ljust(len)}" }.join(" ") + " "
|
490
|
+
ary = [hrule, header, hrule]
|
491
|
+
if datalen.size > 0
|
492
|
+
datastr[:i,nil].each_with_index do |blk, i|
|
493
|
+
list = blk.flatten.to_a
|
494
|
+
ary << " " + [list, lengths].transpose.map{|value, len|
|
495
|
+
"#{value.ljust(len)}"}.join(" ") + " "
|
496
|
+
end
|
497
|
+
end
|
498
|
+
ary << hrule
|
499
|
+
return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n")
|
500
|
+
else
|
501
|
+
namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) }
|
502
|
+
if datalen.size == 0
|
503
|
+
maxwidth = namewidth
|
504
|
+
else
|
505
|
+
datawidth = datastr.convert{|c| __strwidth__(c.to_s) }
|
506
|
+
maxwidth = datawidth.max(0).pmax(namewidth)
|
507
|
+
end
|
508
|
+
len = maxwidth[:*,nil] - datawidth + datalen
|
509
|
+
hrule = "-" + maxwidth.map {|len| "-"*len}.join("--") + "-"
|
510
|
+
header = " " +
|
511
|
+
[namelist, maxwidth.to_a].transpose.map{|name, len|
|
512
|
+
"#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}" }.join(" ") + " "
|
513
|
+
ary = [hrule, header, hrule]
|
514
|
+
if datalen.size > 0
|
515
|
+
datastr[:i,nil].each_with_addr do |blk, i|
|
516
|
+
list = blk.flatten.to_a
|
517
|
+
ary << " " + list.map.with_index {|value, j|
|
518
|
+
"#{value.ljust(len[i,j])}"}.join(" ") + " "
|
519
|
+
end
|
520
|
+
end
|
521
|
+
ary << hrule
|
522
|
+
return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n")
|
523
|
+
end
|
524
|
+
end
|
525
|
+
|
526
|
+
def __obj_to_string__ (obj)
|
527
|
+
case obj
|
528
|
+
when Float
|
529
|
+
"%.6g" % obj
|
530
|
+
else
|
531
|
+
obj.to_s
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
def __strwidth__ (string)
|
536
|
+
if string.ascii_only?
|
537
|
+
return string.length
|
538
|
+
else
|
539
|
+
return string.each_char.inject(0){|s,c| s += c.bytesize > 1 ? 2 : 1 }
|
540
|
+
end
|
541
|
+
end
|
542
|
+
|
543
|
+
def inspect
|
544
|
+
return ascii_table(10)
|
545
|
+
end
|
546
|
+
|
547
|
+
def to_s
|
548
|
+
return ascii_table
|
549
|
+
end
|
550
|
+
|
551
|
+
def to_ary
|
552
|
+
return [to_s]
|
553
|
+
end
|
554
|
+
|
555
|
+
def to_csv (with_row_index: true)
|
556
|
+
if @row_index and with_row_index
|
557
|
+
namelist = [""] + @column_names
|
558
|
+
tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
|
559
|
+
else
|
560
|
+
namelist = @column_names
|
561
|
+
tbl = ca
|
562
|
+
end
|
563
|
+
output = []
|
564
|
+
output << namelist.map(&:to_s).join(",")
|
565
|
+
output << tbl.to_csv
|
566
|
+
return output.join("\n")
|
567
|
+
end
|
568
|
+
|
569
|
+
def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: true, &block)
|
570
|
+
require "axlsx"
|
571
|
+
xl = Axlsx::Package.new
|
572
|
+
xl.use_shared_strings = true
|
573
|
+
sheet = xl.workbook.add_worksheet(name: sheet_name)
|
574
|
+
sheet.add_row(column_names)
|
575
|
+
each_row(with_row_index: with_row_index) do |list|
|
576
|
+
sheet.add_row(list)
|
577
|
+
end
|
578
|
+
if block_given?
|
579
|
+
yield sheet
|
580
|
+
end
|
581
|
+
xl.serialize(filename)
|
582
|
+
end
|
583
|
+
|
584
|
+
def method_missing (name, *args)
|
585
|
+
if args.size == 0
|
586
|
+
name = name.to_s
|
587
|
+
if @column_names.include?(name)
|
588
|
+
return @columns[name]
|
589
|
+
elsif @column_names.include?(name.gsub(/_/,'.')) ### For R
|
590
|
+
return @columns[name.gsub(/_/,'.')]
|
591
|
+
elsif @__methods__.include?(name)
|
592
|
+
return @columns[@__methods__[name]]
|
593
|
+
end
|
594
|
+
end
|
595
|
+
super
|
596
|
+
end
|
597
|
+
|
598
|
+
end
|
599
|
+
|
600
|
+
#############################################################
|
601
|
+
#
|
602
|
+
# ARRANGER
|
603
|
+
#
|
604
|
+
#############################################################
|
605
|
+
|
606
|
+
|
607
|
+
class CADataFrame
|
608
|
+
|
609
|
+
class Arranger
|
610
|
+
|
611
|
+
def initialize (dataframe)
|
612
|
+
@dataframe = dataframe
|
613
|
+
end
|
614
|
+
|
615
|
+
def arrange (&block)
|
616
|
+
instance_exec(&block)
|
617
|
+
return @dataframe
|
618
|
+
end
|
619
|
+
|
620
|
+
private
|
621
|
+
|
622
|
+
def column_names
|
623
|
+
return @dataframe.column_names
|
624
|
+
end
|
625
|
+
|
626
|
+
def row_number
|
627
|
+
return @dataframe.row_number
|
628
|
+
end
|
629
|
+
|
630
|
+
def method (hash)
|
631
|
+
@dataframe.method(hash)
|
632
|
+
end
|
633
|
+
|
634
|
+
def timeseries (name, fmt = "%Y-%m-%d %H:%M:%S")
|
635
|
+
@dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].strptime(fmt)
|
636
|
+
end
|
637
|
+
|
638
|
+
def type (type, name, mask = :novalue)
|
639
|
+
@dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
|
640
|
+
if mask != :novalue
|
641
|
+
@dataframe.columns[name.to_s].maskout!(options[:maskout])
|
642
|
+
end
|
643
|
+
end
|
644
|
+
|
645
|
+
def eliminate (*names)
|
646
|
+
if names.empty?
|
647
|
+
return self
|
648
|
+
end
|
649
|
+
names = names.map(&:to_s)
|
650
|
+
@dataframe.column_names.each do |name|
|
651
|
+
if names.include?(name)
|
652
|
+
@dataframe.columns.delete(name)
|
653
|
+
@dataframe.column_names.delete(name)
|
654
|
+
end
|
655
|
+
end
|
656
|
+
end
|
657
|
+
|
658
|
+
def template (*args, &block)
|
659
|
+
return @dataframe.template(*args, &block)
|
660
|
+
end
|
661
|
+
|
662
|
+
def double (*names)
|
663
|
+
names.flatten.each do |name|
|
664
|
+
type(:double, name)
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
def int (*names)
|
669
|
+
names.flatten.each do |name|
|
670
|
+
type(:int, name)
|
671
|
+
end
|
672
|
+
end
|
673
|
+
|
674
|
+
def maskout (value, *names)
|
675
|
+
names.flatten.each do |name|
|
676
|
+
@dataframe.columns[name.to_s].maskout!(value)
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
def unmask (value, *names)
|
681
|
+
names.flatten.each do |name|
|
682
|
+
@dataframe.columns[name.to_s].unmask(value)
|
683
|
+
end
|
684
|
+
end
|
685
|
+
|
686
|
+
def col (name)
|
687
|
+
return @dataframe.col(name)
|
688
|
+
end
|
689
|
+
|
690
|
+
def append (name, new_column)
|
691
|
+
if new_column
|
692
|
+
# do nothing
|
693
|
+
else
|
694
|
+
new_column = @dataframe.columns.first[1].template(:object)
|
695
|
+
end
|
696
|
+
unless new_column.is_a?(CArray)
|
697
|
+
new_column = new_column.to_ca
|
698
|
+
end
|
699
|
+
@dataframe.columns[name.to_s] = new_column
|
700
|
+
@dataframe.column_names.push(name.to_s)
|
701
|
+
end
|
702
|
+
|
703
|
+
def lead (name, new_column)
|
704
|
+
if new_column
|
705
|
+
# do nothing
|
706
|
+
else
|
707
|
+
new_column = @dataframe.columns.first[1].template(:object)
|
708
|
+
end
|
709
|
+
unless new_column.is_a?(CArray)
|
710
|
+
new_column = new_column.to_ca
|
711
|
+
end
|
712
|
+
@dataframe.columns[name.to_s] = new_column
|
713
|
+
@dataframe.column_names.unshift(name.to_s)
|
714
|
+
end
|
715
|
+
|
716
|
+
def rename (name1, name2)
|
717
|
+
if idx = @dataframe.column_names.index(name1.to_s)
|
718
|
+
@dataframe.column_names[idx] = name2.to_s
|
719
|
+
column = @dataframe.columns[name1.to_s]
|
720
|
+
@dataframe.columns.delete(name1.to_s)
|
721
|
+
@dataframe.columns[name2.to_s] = column
|
722
|
+
else
|
723
|
+
raise "unknown column name #{name1}"
|
724
|
+
end
|
725
|
+
end
|
726
|
+
|
727
|
+
def downcase
|
728
|
+
@dataframe.downcase
|
729
|
+
end
|
730
|
+
|
731
|
+
def classify (name, scale, opt = {})
|
732
|
+
return @dataframe.classify(name, scale, opt)
|
733
|
+
end
|
734
|
+
|
735
|
+
def map (mapper, name_or_column)
|
736
|
+
case name_or_column
|
737
|
+
when String, Symbol
|
738
|
+
name = name_or_column
|
739
|
+
column = @dataframe.columns[name.to_s]
|
740
|
+
when CArray
|
741
|
+
column = name_or_column
|
742
|
+
when Array
|
743
|
+
column = name_or_column.to_ca
|
744
|
+
else
|
745
|
+
raise "invalid argument"
|
746
|
+
end
|
747
|
+
case mapper
|
748
|
+
when Hash
|
749
|
+
return column.convert(:object) {|v| hash[v] }
|
750
|
+
when CArray
|
751
|
+
return mapper.project(column)
|
752
|
+
when Array
|
753
|
+
return mapper.to_ca.project(column)
|
754
|
+
end
|
755
|
+
end
|
756
|
+
|
757
|
+
def method_missing (name, *args)
|
758
|
+
if args.size == 0
|
759
|
+
if @dataframe.column_names.include?(name.to_s)
|
760
|
+
return @dataframe.columns[name.to_s]
|
761
|
+
elsif @dataframe.__methods__.include?(name.to_s)
|
762
|
+
return @dataframe.columns[@dataframe.__methods__[name.to_s]]
|
763
|
+
end
|
764
|
+
end
|
765
|
+
super
|
766
|
+
end
|
767
|
+
|
768
|
+
end
|
769
|
+
|
770
|
+
end
|
771
|
+
|
772
|
+
#############################################################
|
773
|
+
#
|
774
|
+
# Class methods
|
775
|
+
#
|
776
|
+
#############################################################
|
777
|
+
|
778
|
+
class CADataFrame
|
779
|
+
|
780
|
+
def self.load_sqlite3 (*args)
|
781
|
+
return CArray.load_sqlite3(*args).to_dataframe.arrange{ maskout nil, *column_names }
|
782
|
+
end
|
783
|
+
|
784
|
+
def to_sqlite3 (*args)
|
785
|
+
ca = self.ca.to_ca
|
786
|
+
ca.extend CA::TableMethods
|
787
|
+
ca.column_names = column_names
|
788
|
+
ca.to_sqlite3(*args)
|
789
|
+
end
|
790
|
+
|
791
|
+
def self.load_csv (*args, &block)
|
792
|
+
return CArray.load_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
|
793
|
+
end
|
794
|
+
|
795
|
+
def self.from_csv (*args, &block)
|
796
|
+
return CArray.from_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
|
797
|
+
end
|
798
|
+
|
799
|
+
def self.merge (*args)
|
800
|
+
ref = args.first
|
801
|
+
new_columns = {}
|
802
|
+
args.each do |table|
|
803
|
+
table.column_names.each do |name|
|
804
|
+
new_columns[name] = table.col(name)
|
805
|
+
end
|
806
|
+
end
|
807
|
+
return CADataFrame.new(new_columns, ref.row_index)
|
808
|
+
end
|
809
|
+
|
810
|
+
def self.concat (*args)
|
811
|
+
ref = args.first
|
812
|
+
column_names = ref.column_names
|
813
|
+
new_columns = {}
|
814
|
+
column_names.each do |name|
|
815
|
+
list = args.map{|t| t.col(name) }
|
816
|
+
data_type = list.first.data_type
|
817
|
+
new_columns[name] = CArray.bind(data_type, list, 0)
|
818
|
+
end
|
819
|
+
if args.map(&:row_index).all?
|
820
|
+
new_row_index = CArray.join(*args.map(&:row_index))
|
821
|
+
else
|
822
|
+
new_row_index = nil
|
823
|
+
end
|
824
|
+
return CADataFrame.new(new_columns, new_row_index)
|
825
|
+
end
|
826
|
+
|
827
|
+
|
828
|
+
end
|
829
|
+
|
830
|
+
#############################################################
|
831
|
+
#
|
832
|
+
# CADFArray
|
833
|
+
#
|
834
|
+
#############################################################
|
835
|
+
|
836
|
+
class CADFArray < CAObject # :nodoc:
|
837
|
+
|
838
|
+
def initialize (column_names, columns)
|
839
|
+
@column_names = column_names
|
840
|
+
@columns = columns
|
841
|
+
dim = [@columns[@column_names.first].size, @column_names.size]
|
842
|
+
extend CA::TableMethods
|
843
|
+
super(:object, dim, :read_only=>true)
|
844
|
+
__create_mask__
|
845
|
+
end
|
846
|
+
|
847
|
+
attr_reader :column_names
|
848
|
+
|
849
|
+
def fetch_index (idx)
|
850
|
+
r, c = *idx
|
851
|
+
name = @column_names[c]
|
852
|
+
return @columns[name][r]
|
853
|
+
end
|
854
|
+
|
855
|
+
def copy_data (data)
|
856
|
+
@column_names.each_with_index do |name, i|
|
857
|
+
data[nil,i] = @columns[name].value
|
858
|
+
end
|
859
|
+
end
|
860
|
+
|
861
|
+
def create_mask
|
862
|
+
end
|
863
|
+
|
864
|
+
def mask_fetch_index (idx)
|
865
|
+
r, c = *idx
|
866
|
+
name = @column_names[c]
|
867
|
+
if @columns[name].has_mask?
|
868
|
+
return @columns[name].mask[r]
|
869
|
+
else
|
870
|
+
return 0
|
871
|
+
end
|
872
|
+
end
|
873
|
+
|
874
|
+
def mask_copy_data (data)
|
875
|
+
@column_names.each_with_index do |name, i|
|
876
|
+
if @columns[name].has_mask?
|
877
|
+
data[nil,i] = @columns[name].mask
|
878
|
+
end
|
879
|
+
end
|
880
|
+
end
|
881
|
+
|
882
|
+
end
|
883
|
+
|
884
|
+
|
885
|
+
#############################################################
|
886
|
+
#
|
887
|
+
# GROUPING
|
888
|
+
#
|
889
|
+
#############################################################
|
890
|
+
|
891
|
+
class CADataFrame
|
892
|
+
|
893
|
+
def group_by (*names)
|
894
|
+
if names.size == 1
|
895
|
+
return CADataFrameGroup.new(self, names[0])
|
896
|
+
else
|
897
|
+
return CADataFrameGroupMulti.new(self, *names)
|
898
|
+
end
|
899
|
+
end
|
900
|
+
|
901
|
+
end
|
902
|
+
|
903
|
+
class CADataFrameGroup
|
904
|
+
|
905
|
+
def initialize (dataframe, name)
|
906
|
+
@dataframe = dataframe
|
907
|
+
case name
|
908
|
+
when Hash
|
909
|
+
name, list = name.first
|
910
|
+
@column = @dataframe.col(name)
|
911
|
+
@keys = list.to_ca
|
912
|
+
else
|
913
|
+
@column = @dataframe.col(name)
|
914
|
+
@keys = @column.uniq.sort
|
915
|
+
end
|
916
|
+
@name = name.to_s
|
917
|
+
@addrs = {}
|
918
|
+
@keys.each do |k|
|
919
|
+
@addrs[k] = @column.eq(k).where
|
920
|
+
end
|
921
|
+
end
|
922
|
+
|
923
|
+
def table (&block)
|
924
|
+
hashpool = []
|
925
|
+
@keys.each do |k|
|
926
|
+
hashpool << @dataframe[@addrs[k]].execute(&block)
|
927
|
+
end
|
928
|
+
columns = {@name=>@keys}
|
929
|
+
hashpool.each_with_index do |hash, i|
|
930
|
+
hash.each do |key, value|
|
931
|
+
columns[key] ||= []
|
932
|
+
columns[key][i] = value
|
933
|
+
end
|
934
|
+
end
|
935
|
+
return CADataFrame.new(columns)
|
936
|
+
end
|
937
|
+
|
938
|
+
def calculate (label, &block)
|
939
|
+
new_columns = {@name=>@keys}
|
940
|
+
@dataframe.each_column do |name, column|
|
941
|
+
if name == @name
|
942
|
+
next
|
943
|
+
end
|
944
|
+
new_columns[name] = CArray.object(@keys.size) { UNDEF }
|
945
|
+
@keys.each_with_index do |k, i|
|
946
|
+
begin
|
947
|
+
if block
|
948
|
+
new_columns[name][i] = yield(name, column[@addrs[k]])
|
949
|
+
else
|
950
|
+
new_columns[name][i] = column[@addrs[k]].send(label.intern)
|
951
|
+
end
|
952
|
+
rescue
|
953
|
+
end
|
954
|
+
end
|
955
|
+
end
|
956
|
+
return CADataFrame.new(new_columns)
|
957
|
+
end
|
958
|
+
|
959
|
+
def [] (group_value)
|
960
|
+
if map = @addrs[group_value]
|
961
|
+
return @dataframe[map]
|
962
|
+
else
|
963
|
+
return @dataframe.vacant_copy
|
964
|
+
end
|
965
|
+
end
|
966
|
+
|
967
|
+
|
968
|
+
end
|
969
|
+
|
970
|
+
class CADataFrameGroupMulti
|
971
|
+
|
972
|
+
def initialize (dataframe, *names)
|
973
|
+
@rank = names.size
|
974
|
+
@dataframe = dataframe
|
975
|
+
@names = []
|
976
|
+
@column = []
|
977
|
+
@keys = []
|
978
|
+
names.each_with_index do |name, i|
|
979
|
+
case name
|
980
|
+
when Hash
|
981
|
+
name, list = name.first
|
982
|
+
@column[i] = @dataframe.col(name)
|
983
|
+
@keys[i] = list.to_ca
|
984
|
+
else
|
985
|
+
@column[i] = @dataframe.col(name)
|
986
|
+
@keys[i] = @column[i].to_ca.uniq.sort
|
987
|
+
end
|
988
|
+
@names[i] = name
|
989
|
+
end
|
990
|
+
@addrs = {}
|
991
|
+
each_with_keys do |list|
|
992
|
+
flag = @column[0].eq(list[0])
|
993
|
+
(1...@rank).each do |i|
|
994
|
+
flag &= @column[i].eq(list[i])
|
995
|
+
end
|
996
|
+
@addrs[list] = flag.where
|
997
|
+
end
|
998
|
+
end
|
999
|
+
|
1000
|
+
def each_with_keys (&block)
|
1001
|
+
@keys[0].to_a.product(*@keys[1..-1].map(&:to_a)).each(&block)
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
def table (&block)
|
1005
|
+
hashpool = []
|
1006
|
+
each_with_keys do |list|
|
1007
|
+
hashpool << @dataframe[@addrs[list]].execute(&block)
|
1008
|
+
end
|
1009
|
+
columns = {}
|
1010
|
+
@names.each do |name|
|
1011
|
+
columns[name] = []
|
1012
|
+
end
|
1013
|
+
each_with_keys.with_index do |list,j|
|
1014
|
+
@names.each_with_index do |name,i|
|
1015
|
+
columns[name][j] = list[i]
|
1016
|
+
end
|
1017
|
+
end
|
1018
|
+
hashpool.each_with_index do |hash, i|
|
1019
|
+
hash.each do |key, value|
|
1020
|
+
columns[key] ||= []
|
1021
|
+
columns[key][i] = value
|
1022
|
+
end
|
1023
|
+
end
|
1024
|
+
return CADataFrame.new(columns)
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
def [] (group_value)
|
1028
|
+
if map = @addrs[group_value]
|
1029
|
+
return @dataframe[map]
|
1030
|
+
else
|
1031
|
+
return @dataframe.vacant_copy
|
1032
|
+
end
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
def each
|
1036
|
+
each_with_keys do |key|
|
1037
|
+
yield key, @dataframe[@addrs[key]]
|
1038
|
+
end
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
end
|
1042
|
+
|
1043
|
+
|
1044
|
+
#############################################################
|
1045
|
+
#
|
1046
|
+
# PIVOT TABLE
|
1047
|
+
#
|
1048
|
+
#############################################################
|
1049
|
+
|
1050
|
+
class CADataFrame
|
1051
|
+
|
1052
|
+
def pivot (name1, name2)
|
1053
|
+
return CADataFramePivot.new(self, name1, name2)
|
1054
|
+
end
|
1055
|
+
|
1056
|
+
end
|
1057
|
+
|
1058
|
+
class CADataFramePivot
|
1059
|
+
|
1060
|
+
def initialize (dataframe, name1, name2)
|
1061
|
+
@dataframe = dataframe
|
1062
|
+
case name1
|
1063
|
+
when Hash
|
1064
|
+
name1, list = name1.first
|
1065
|
+
@column1 = @dataframe.col(name1)
|
1066
|
+
@keys1 = list.to_ca
|
1067
|
+
else
|
1068
|
+
@column1 = @dataframe.col(name1)
|
1069
|
+
@keys1 = @column1.uniq.sort
|
1070
|
+
end
|
1071
|
+
case name2
|
1072
|
+
when Hash
|
1073
|
+
name2, list = name2.first
|
1074
|
+
@column2 = @dataframe.col(name2)
|
1075
|
+
@keys2 = list
|
1076
|
+
else
|
1077
|
+
@column2 = @dataframe.col(name2)
|
1078
|
+
@keys2 = @column2.uniq.sort
|
1079
|
+
end
|
1080
|
+
@addrs = {}
|
1081
|
+
@keys1.each do |k1|
|
1082
|
+
@keys2.each do |k2|
|
1083
|
+
@addrs[[k1,k2]] = (@column1.eq(k1) & @column2.eq(k2)).where
|
1084
|
+
end
|
1085
|
+
end
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
def table (&block)
|
1089
|
+
columns = {}
|
1090
|
+
@keys2.each do |k2|
|
1091
|
+
columns[k2] = CArray.object(@keys1.size) { UNDEF }
|
1092
|
+
end
|
1093
|
+
@keys1.each_with_index do |k1, i|
|
1094
|
+
@keys2.each do |k2|
|
1095
|
+
columns[k2][i] = @dataframe[@addrs[[k1,k2]]].execute(&block)
|
1096
|
+
end
|
1097
|
+
end
|
1098
|
+
return CADataFrame.new(columns, @keys1)
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
end
|
1102
|
+
|
1103
|
+
|
1104
|
+
|