carray 1.1.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/carray.gemspec +7 -2
- data/carray.h +1 -0
- data/carray_access.c +16 -3
- data/devel/guess_shape.rb +76 -0
- data/examples/ex001.rb +10 -0
- data/examples/test-int.rb +13 -0
- data/lib/carray/autoload/autoload_graphics_zimg.rb +3 -0
- data/lib/carray/base/autoload.rb +6 -1
- data/lib/carray/base/basic.rb +16 -6
- data/lib/carray/base/inspect.rb +4 -6
- data/lib/carray/base/struct.rb +1 -1
- data/lib/carray/graphics/zimg.rb +296 -0
- data/lib/carray/io/csv.rb +16 -9
- data/lib/carray/io/sqlite3.rb +7 -2
- data/test/test_ref_store.rb +1 -1
- data/version.h +4 -4
- metadata +82 -10
- data/ext/dataframe/API.txt +0 -11
- data/ext/dataframe/extconf.rb +0 -3
- data/ext/dataframe/lib/carray/autoload/autoload_dataframe_dataframe.rb +0 -14
- data/ext/dataframe/lib/carray/dataframe/dataframe.rb +0 -1104
- data/ext/dataframe/sample/test_uniq_sort.rb +0 -5
data/lib/carray/io/csv.rb
CHANGED
@@ -414,6 +414,9 @@ module CA
|
|
414
414
|
@table = CArray.merge(CA_OBJECT, table[nil].fields)
|
415
415
|
else
|
416
416
|
@names = table.instance_exec{ @names }
|
417
|
+
if @names.nil?
|
418
|
+
@names = table.instance_exec{ @column_names }
|
419
|
+
end
|
417
420
|
case
|
418
421
|
when table.rank > 2
|
419
422
|
@table = table.reshape(false,nil).object
|
@@ -451,7 +454,7 @@ module CA
|
|
451
454
|
end
|
452
455
|
|
453
456
|
# puts header
|
454
|
-
def header (list
|
457
|
+
def header (list = @names)
|
455
458
|
@io.write list.map{|s| csv_quote(s)}.join(@sep)
|
456
459
|
@io.write(@rs)
|
457
460
|
end
|
@@ -464,25 +467,29 @@ module CA
|
|
464
467
|
|
465
468
|
# write value
|
466
469
|
# If option :strict is set, do csv_quote for string element
|
467
|
-
def body (
|
468
|
-
if
|
470
|
+
def body (strict: true, format: nil)
|
471
|
+
if strict
|
469
472
|
case @table.data_type
|
470
473
|
when CA_OBJECT
|
471
|
-
table = @table
|
474
|
+
table = @table.to_ca
|
472
475
|
table[:is_kind_of, String].map! { |s| csv_quote(s) }
|
473
476
|
when CA_FIXLEN
|
474
477
|
table = @table.object
|
475
478
|
table.map! { |s| csv_quote(s) }
|
476
479
|
else
|
477
|
-
table = @table.object
|
480
|
+
table = @table.object
|
478
481
|
end
|
482
|
+
else
|
483
|
+
table = @table
|
484
|
+
end
|
485
|
+
if format
|
479
486
|
table.dim0.times do |i|
|
480
|
-
@io.write table[i,nil].to_a
|
487
|
+
@io.write Kernel::format(format,*table[i,nil].to_a)
|
481
488
|
@io.write(@rs)
|
482
|
-
end
|
489
|
+
end
|
483
490
|
else
|
484
|
-
|
485
|
-
@io.write
|
491
|
+
table.dim0.times do |i|
|
492
|
+
@io.write table[i,nil].to_a.join(@sep)
|
486
493
|
@io.write(@rs)
|
487
494
|
end
|
488
495
|
end
|
data/lib/carray/io/sqlite3.rb
CHANGED
@@ -68,8 +68,13 @@ class CArray
|
|
68
68
|
insert.execute [self[i]]
|
69
69
|
end
|
70
70
|
else
|
71
|
-
|
72
|
-
|
71
|
+
begin
|
72
|
+
dim0.times do |i|
|
73
|
+
insert.execute self[i,nil].to_a
|
74
|
+
end
|
75
|
+
rescue
|
76
|
+
puts self[i,nil].to_a
|
77
|
+
raise $!
|
73
78
|
end
|
74
79
|
end
|
75
80
|
database.commit if transaction
|
data/test/test_ref_store.rb
CHANGED
@@ -155,7 +155,7 @@ class TestCArrayRefStore < Test::Unit::TestCase
|
|
155
155
|
a = CArray.int(3,3).seq!
|
156
156
|
assert_equal(CA_INT([4]), a[[4]])
|
157
157
|
assert_equal(CA_INT([0,1,2,3]), a[[0..3]])
|
158
|
-
assert_equal(
|
158
|
+
assert_equal(CARefer, a[nil].class)
|
159
159
|
assert_equal(CA_INT([0,1,2,3,4,5,6,7,8]), a[nil])
|
160
160
|
assert_raise(IndexError) { a[0..9] }
|
161
161
|
assert_raise(IndexError) { a[-10..-1] }
|
data/version.h
CHANGED
@@ -10,9 +10,9 @@
|
|
10
10
|
|
11
11
|
---------------------------------------------------------------------------- */
|
12
12
|
|
13
|
-
#define CA_VERSION "1.1.
|
14
|
-
#define CA_VERSION_CODE
|
13
|
+
#define CA_VERSION "1.1.8"
|
14
|
+
#define CA_VERSION_CODE 118
|
15
15
|
#define CA_VERSION_MAJOR 1
|
16
16
|
#define CA_VERSION_MINOR 1
|
17
|
-
#define CA_VERSION_TEENY
|
18
|
-
#define CA_VERSION_DATE "2016/05/
|
17
|
+
#define CA_VERSION_TEENY 8
|
18
|
+
#define CA_VERSION_DATE "2016/05/10"
|
metadata
CHANGED
@@ -1,15 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: carray
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hiroki Motoyoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
12
|
-
dependencies:
|
11
|
+
date: 2016-08-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: narray
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.6.1.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.6.1.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: narray_miss
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.3'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.3'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: axlsx
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '2.0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '2.0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: spreadsheet
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.1'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.1'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sqlite3
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.3'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.3'
|
13
83
|
description: |2
|
14
84
|
CArray is a uniform multi-dimensional rectangular array class.
|
15
85
|
It provides the various types of sub-arrays and references
|
@@ -20,7 +90,6 @@ executables: []
|
|
20
90
|
extensions:
|
21
91
|
- extconf.rb
|
22
92
|
- ext/calculus/extconf.rb
|
23
|
-
- ext/dataframe/extconf.rb
|
24
93
|
- ext/fortio/extconf.rb
|
25
94
|
- ext/imagemap/extconf.rb
|
26
95
|
- ext/mathfunc/extconf.rb
|
@@ -84,17 +153,15 @@ files:
|
|
84
153
|
- carray_test.c
|
85
154
|
- carray_undef.c
|
86
155
|
- carray_utils.c
|
156
|
+
- devel/guess_shape.rb
|
157
|
+
- examples/ex001.rb
|
158
|
+
- examples/test-int.rb
|
87
159
|
- ext/calculus/carray_calculus.c
|
88
160
|
- ext/calculus/carray_interp.c
|
89
161
|
- ext/calculus/extconf.rb
|
90
162
|
- ext/calculus/lib/autoload/autoload_math_calculus.rb
|
91
163
|
- ext/calculus/lib/math/calculus.rb
|
92
164
|
- ext/calculus/lib/math/interp/adapter_interp1d.rb
|
93
|
-
- ext/dataframe/API.txt
|
94
|
-
- ext/dataframe/extconf.rb
|
95
|
-
- ext/dataframe/lib/carray/autoload/autoload_dataframe_dataframe.rb
|
96
|
-
- ext/dataframe/lib/carray/dataframe/dataframe.rb
|
97
|
-
- ext/dataframe/sample/test_uniq_sort.rb
|
98
165
|
- ext/fortio/extconf.rb
|
99
166
|
- ext/fortio/lib/carray/autoload/autoload_fortran_format.rb
|
100
167
|
- ext/fortio/lib/carray/io/fortran_format.rb
|
@@ -152,6 +219,7 @@ files:
|
|
152
219
|
- lib/carray.rb
|
153
220
|
- lib/carray/autoload/autoload_base.rb
|
154
221
|
- lib/carray/autoload/autoload_graphics_gnuplot.rb
|
222
|
+
- lib/carray/autoload/autoload_graphics_zimg.rb
|
155
223
|
- lib/carray/autoload/autoload_io_csv.rb
|
156
224
|
- lib/carray/autoload/autoload_io_excel.rb
|
157
225
|
- lib/carray/autoload/autoload_io_imagemagick.rb
|
@@ -173,6 +241,7 @@ files:
|
|
173
241
|
- lib/carray/base/serialize.rb
|
174
242
|
- lib/carray/base/struct.rb
|
175
243
|
- lib/carray/graphics/gnuplot.rb
|
244
|
+
- lib/carray/graphics/zimg.rb
|
176
245
|
- lib/carray/info.rb
|
177
246
|
- lib/carray/io/csv.rb
|
178
247
|
- lib/carray/io/excel.rb
|
@@ -264,13 +333,14 @@ licenses: []
|
|
264
333
|
metadata: {}
|
265
334
|
post_install_message:
|
266
335
|
rdoc_options:
|
267
|
-
- "--main
|
336
|
+
- "--main rdoc_main.rb"
|
268
337
|
- rdoc_main.rb
|
269
338
|
- rdoc_ext.rb
|
270
339
|
- rdoc_math.rb
|
271
340
|
- rdoc_stat.rb
|
272
341
|
- lib/carray/autoload/autoload_base.rb
|
273
342
|
- lib/carray/autoload/autoload_graphics_gnuplot.rb
|
343
|
+
- lib/carray/autoload/autoload_graphics_zimg.rb
|
274
344
|
- lib/carray/autoload/autoload_io_csv.rb
|
275
345
|
- lib/carray/autoload/autoload_io_excel.rb
|
276
346
|
- lib/carray/autoload/autoload_io_imagemagick.rb
|
@@ -292,6 +362,7 @@ rdoc_options:
|
|
292
362
|
- lib/carray/base/serialize.rb
|
293
363
|
- lib/carray/base/struct.rb
|
294
364
|
- lib/carray/graphics/gnuplot.rb
|
365
|
+
- lib/carray/graphics/zimg.rb
|
295
366
|
- lib/carray/info.rb
|
296
367
|
- lib/carray/io/csv.rb
|
297
368
|
- lib/carray/io/excel.rb
|
@@ -326,3 +397,4 @@ signing_key:
|
|
326
397
|
specification_version: 4
|
327
398
|
summary: Multi-dimesional array class
|
328
399
|
test_files: []
|
400
|
+
has_rdoc: true
|
data/ext/dataframe/API.txt
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
CADataFrame
|
2
|
-
|
3
|
-
#append(name) { INSTANCE_CONTEXT } <- any carray
|
4
|
-
#lead(name) { INSTANCE_CONTEXT } <- any carray
|
5
|
-
#execute { INSTANCE_CONTEXT } => any object
|
6
|
-
#select(name...) { INSTANCE_CONTEXT } <- boolean carray
|
7
|
-
#reorder { INSTANCE_CONTEXT } <- int32 carray (addresses for mapping)
|
8
|
-
#order_by { INSTANCE_CONTEXT } <- Array of int32 carray or carray (addresses for mapping)
|
9
|
-
|
10
|
-
#calculate {|label, column| CALLER_CONTEXT } <- scalar
|
11
|
-
#resample {|label, column| CALLER_CONTEXT } <- any carray
|
data/ext/dataframe/extconf.rb
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
|
2
|
-
module CA::TableMethods
|
3
|
-
autoload_method "to_dataframe", "carray/dataframe/dataframe"
|
4
|
-
end
|
5
|
-
|
6
|
-
autoload :CADataFrame, "carray/dataframe/dataframe"
|
7
|
-
|
8
|
-
autoload :DataFrame, "carray/dataframe/dataframe"
|
9
|
-
autoload :RSReceiver, "carray/dataframe/dataframe"
|
10
|
-
|
11
|
-
class RSRuby
|
12
|
-
autoload_method "setup", "carray/dataframe/dataframe"
|
13
|
-
autoload_method "recieve", "carray/dataframe/dataframe"
|
14
|
-
end
|
@@ -1,1104 +0,0 @@
|
|
1
|
-
require "carray"
|
2
|
-
require "carray/io/table"
|
3
|
-
|
4
|
-
module CA::TableMethods
|
5
|
-
|
6
|
-
def to_dataframe (&block)
|
7
|
-
return CADataFrame.new(self, &block)
|
8
|
-
end
|
9
|
-
|
10
|
-
end
|
11
|
-
|
12
|
-
class CADataFrame
|
13
|
-
|
14
|
-
def initialize (columns_or_table, row_index = nil, column_names = nil, &block)
|
15
|
-
case columns_or_table
|
16
|
-
when Hash
|
17
|
-
columns = columns_or_table
|
18
|
-
@column_names = columns.keys.map(&:to_s)
|
19
|
-
@columns = normalize_columns(columns)
|
20
|
-
@column_number = @column_names.size
|
21
|
-
@row_number = @columns.first[1].size
|
22
|
-
if @column_names.any?{ |key| @columns[key].size != @row_number }
|
23
|
-
raise "column sizes mismatch"
|
24
|
-
end
|
25
|
-
when CArray
|
26
|
-
table = columns_or_table
|
27
|
-
if column_names
|
28
|
-
@column_names = column_names.map(&:to_s)
|
29
|
-
else
|
30
|
-
@column_names = table.column_names.map(&:to_s)
|
31
|
-
end
|
32
|
-
@columns = table_to_columns(table)
|
33
|
-
@column_number = @column_names.size
|
34
|
-
@row_number = table.dim0
|
35
|
-
else
|
36
|
-
raise "unknown data"
|
37
|
-
end
|
38
|
-
if row_index
|
39
|
-
@row_index = row_index.to_ca.object
|
40
|
-
else
|
41
|
-
@row_index = nil
|
42
|
-
end
|
43
|
-
@__methods__ = {}
|
44
|
-
if block_given?
|
45
|
-
arrange(&block)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
def __methods__
|
50
|
-
return @__methods__
|
51
|
-
end
|
52
|
-
|
53
|
-
private
|
54
|
-
|
55
|
-
def table_to_columns (table)
|
56
|
-
new_columns = {}
|
57
|
-
@column_names.each_with_index do |name, i|
|
58
|
-
new_columns[name] = table[nil,i]
|
59
|
-
end
|
60
|
-
return new_columns
|
61
|
-
end
|
62
|
-
|
63
|
-
def normalize_columns (columns)
|
64
|
-
new_columns = {}
|
65
|
-
columns.each_key do |key|
|
66
|
-
case columns[key]
|
67
|
-
when CArray
|
68
|
-
column = columns[key]
|
69
|
-
when Array
|
70
|
-
column = columns[key].to_ca
|
71
|
-
if column.rank != 1
|
72
|
-
list = columns[key].clone
|
73
|
-
column = CArray.object(list.size).convert { list.shift }
|
74
|
-
end
|
75
|
-
else
|
76
|
-
column = columns[key].to_ca
|
77
|
-
end
|
78
|
-
new_columns[key.to_s] = column
|
79
|
-
end
|
80
|
-
return new_columns
|
81
|
-
end
|
82
|
-
|
83
|
-
public
|
84
|
-
|
85
|
-
attr_reader :columns, :column_names, :row_index, :column_number, :row_number
|
86
|
-
|
87
|
-
def column_types
|
88
|
-
return @columns_names.map{|name| @columns[name].data_type_name }
|
89
|
-
end
|
90
|
-
|
91
|
-
def each_column (&block)
|
92
|
-
return @columns.each(&block)
|
93
|
-
end
|
94
|
-
|
95
|
-
def each_row (with_row_index: false, &block)
|
96
|
-
if with_row_index and @row_index
|
97
|
-
@row_number.times do |i|
|
98
|
-
yield [@row_index[i]] + @columns.map{|n,c| c[i] }
|
99
|
-
end
|
100
|
-
else
|
101
|
-
@row_number.times do |i|
|
102
|
-
yield @columns.map{|n,c| c[i] }
|
103
|
-
end
|
104
|
-
end
|
105
|
-
return self
|
106
|
-
end
|
107
|
-
|
108
|
-
def method (hash)
|
109
|
-
new_hash = {}
|
110
|
-
hash.each do |key, value|
|
111
|
-
new_hash[key.to_s] = value.to_s
|
112
|
-
end
|
113
|
-
@__methods__.update(new_hash)
|
114
|
-
end
|
115
|
-
|
116
|
-
def col (name_or_index)
|
117
|
-
case name_or_index
|
118
|
-
when Integer
|
119
|
-
return @columns[@column_names[name_or_index]]
|
120
|
-
when String, Symbol
|
121
|
-
return @columns[name_or_index.to_s]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def template (*args, &block)
|
126
|
-
return @columns.first[1].template(*args, &block)
|
127
|
-
end
|
128
|
-
|
129
|
-
def row (idx)
|
130
|
-
if @row_index
|
131
|
-
addr = @row_index.search(idx)
|
132
|
-
return @column_names.map{|name| @columns[name][addr]}.to_ca
|
133
|
-
else
|
134
|
-
return @column_names.map{|name| @columns[name][idx]}.to_ca
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
def [] (row, col = nil)
|
139
|
-
if row.is_a?(Integer)
|
140
|
-
row = [row]
|
141
|
-
end
|
142
|
-
if col
|
143
|
-
if col.is_a?(Integer)
|
144
|
-
col = [col]
|
145
|
-
end
|
146
|
-
keys = @column_names.to_ca[col].to_a
|
147
|
-
values = @columns.values_at(*keys)
|
148
|
-
new_columns = {}
|
149
|
-
keys.each do |key|
|
150
|
-
new_columns[key] = @columns[key][row]
|
151
|
-
end
|
152
|
-
return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
|
153
|
-
else
|
154
|
-
new_columns = {}
|
155
|
-
@column_names.each do |key|
|
156
|
-
new_columns[key] = @columns[key][row]
|
157
|
-
end
|
158
|
-
return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
def fill (*names, value)
|
163
|
-
names.each do |name|
|
164
|
-
@columns[name.to_s].fill(value)
|
165
|
-
end
|
166
|
-
return self
|
167
|
-
end
|
168
|
-
|
169
|
-
def arrange (&block)
|
170
|
-
return Arranger.new(self).arrange(&block)
|
171
|
-
end
|
172
|
-
|
173
|
-
def rename (name1, name2)
|
174
|
-
if idx = @column_names.index(name1.to_s)
|
175
|
-
@column_names[idx] = name2.to_s
|
176
|
-
column = @columns[name1.to_s]
|
177
|
-
@columns.delete(name1.to_s)
|
178
|
-
@columns[name2.to_s] = column
|
179
|
-
else
|
180
|
-
raise "unknown column name #{name1}"
|
181
|
-
end
|
182
|
-
end
|
183
|
-
|
184
|
-
def downcase
|
185
|
-
new_column_names = []
|
186
|
-
new_columns = {}
|
187
|
-
@column_names.each do |name|
|
188
|
-
new_column_names << name.downcase
|
189
|
-
new_columns[name.downcase] = @columns[name]
|
190
|
-
end
|
191
|
-
@column_names = new_column_names
|
192
|
-
@columns = new_columns
|
193
|
-
return self
|
194
|
-
end
|
195
|
-
|
196
|
-
def append (name, new_column = nil, &block)
|
197
|
-
if new_column
|
198
|
-
# do nothing
|
199
|
-
elsif block
|
200
|
-
new_column = instance_exec(&block)
|
201
|
-
else
|
202
|
-
new_column = @columns.first[1].template(:object)
|
203
|
-
end
|
204
|
-
unless new_column.is_a?(CArray)
|
205
|
-
new_column = new_column.to_ca
|
206
|
-
end
|
207
|
-
new_columns = {}
|
208
|
-
@column_names.each do |key|
|
209
|
-
new_columns[key] = @columns[key]
|
210
|
-
end
|
211
|
-
new_columns[name.to_s] = new_column
|
212
|
-
return CADataFrame.new(new_columns, @row_index)
|
213
|
-
end
|
214
|
-
|
215
|
-
def lead (name, new_column = nil, &block)
|
216
|
-
if new_column
|
217
|
-
# do nothing
|
218
|
-
elsif block
|
219
|
-
new_column = instance_exec(&block)
|
220
|
-
else
|
221
|
-
new_column = @columns.first[1].template(:object)
|
222
|
-
end
|
223
|
-
unless new_column.is_a?(CArray)
|
224
|
-
new_column = new_column.to_ca
|
225
|
-
end
|
226
|
-
new_columns = {}
|
227
|
-
new_columns[name.to_s] = new_column
|
228
|
-
@column_names.each do |key|
|
229
|
-
new_columns[key] = @columns[key]
|
230
|
-
end
|
231
|
-
return CADataFrame.new(new_columns, @row_index)
|
232
|
-
end
|
233
|
-
|
234
|
-
def vacant_copy
|
235
|
-
new_columns = {}
|
236
|
-
@column_names.each do |key|
|
237
|
-
new_columns[key] = CArray.object(0)
|
238
|
-
end
|
239
|
-
return CADataFrame.new(new_columns)
|
240
|
-
end
|
241
|
-
|
242
|
-
def merge (*args)
|
243
|
-
return CADataFrame.merge(self, *args)
|
244
|
-
end
|
245
|
-
|
246
|
-
|
247
|
-
def execute (&block)
|
248
|
-
return instance_exec(&block)
|
249
|
-
end
|
250
|
-
|
251
|
-
def calculate (label, &block)
|
252
|
-
hash = {}
|
253
|
-
@column_names.each do |name|
|
254
|
-
begin
|
255
|
-
if block
|
256
|
-
hash[name] = [yield(name, @columns[name])]
|
257
|
-
else
|
258
|
-
hash[name] = [@columns[name].send(label.intern)]
|
259
|
-
end
|
260
|
-
rescue
|
261
|
-
hash[name] = [UNDEF]
|
262
|
-
end
|
263
|
-
end
|
264
|
-
return CADataFrame.new(hash, [label])
|
265
|
-
end
|
266
|
-
|
267
|
-
def resample (&block)
|
268
|
-
new_columns = {}
|
269
|
-
@column_names.each do |name|
|
270
|
-
begin
|
271
|
-
new_columns[name] = yield(name, @columns[name])
|
272
|
-
rescue
|
273
|
-
end
|
274
|
-
end
|
275
|
-
return CADataFrame.new(new_columns)
|
276
|
-
end
|
277
|
-
|
278
|
-
def select (*names, &block)
|
279
|
-
if names.empty?
|
280
|
-
names = @column_names
|
281
|
-
end
|
282
|
-
if block
|
283
|
-
row = instance_exec(&block)
|
284
|
-
else
|
285
|
-
row = nil
|
286
|
-
end
|
287
|
-
new_columns = {}
|
288
|
-
names.map(&:to_s).each do |name|
|
289
|
-
new_columns[name] = @columns[name][row]
|
290
|
-
end
|
291
|
-
return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
|
292
|
-
end
|
293
|
-
|
294
|
-
def eliminate (*names)
|
295
|
-
if names.empty?
|
296
|
-
return self
|
297
|
-
end
|
298
|
-
names = names.map(&:to_s)
|
299
|
-
new_columns = {}
|
300
|
-
@column_names.each do |name|
|
301
|
-
unless names.include?(name)
|
302
|
-
new_columns[name] = @columns[name]
|
303
|
-
end
|
304
|
-
end
|
305
|
-
return CADataFrame.new(new_columns, @row_index)
|
306
|
-
end
|
307
|
-
|
308
|
-
def matchup (keyname, reference)
|
309
|
-
key = @columns[keyname.to_s]
|
310
|
-
idx = reference.matchup(key)
|
311
|
-
new_columns = {}
|
312
|
-
@column_names.each do |name|
|
313
|
-
if name == keyname
|
314
|
-
new_columns[name] = reference
|
315
|
-
else
|
316
|
-
new_columns[name] = @columns[name].project(idx)
|
317
|
-
end
|
318
|
-
end
|
319
|
-
if @row_index
|
320
|
-
new_row_index = @row_index.project(idx).unmask(nil)
|
321
|
-
else
|
322
|
-
new_row_index = nil
|
323
|
-
end
|
324
|
-
return CADataFrame.new(new_columns, new_row_index) {
|
325
|
-
self.send(keyname)[] = reference
|
326
|
-
}
|
327
|
-
end
|
328
|
-
|
329
|
-
def reorder (&block)
|
330
|
-
index = instance_exec(&block)
|
331
|
-
new_columns = {}
|
332
|
-
@column_names.each do |name|
|
333
|
-
new_columns[name] = @columns[name][index]
|
334
|
-
end
|
335
|
-
return CADataFrame.new(new_columns, @row_index ? @row_index[index] : nil)
|
336
|
-
end
|
337
|
-
|
338
|
-
def order_by (*names, &block)
|
339
|
-
if names.empty?
|
340
|
-
if block
|
341
|
-
ret = instance_exec(&block)
|
342
|
-
case ret
|
343
|
-
when CArray
|
344
|
-
list = [ret]
|
345
|
-
when Array
|
346
|
-
list = ret
|
347
|
-
end
|
348
|
-
end
|
349
|
-
else
|
350
|
-
list = @columns.values_at(*names.map{|s| s.to_s})
|
351
|
-
end
|
352
|
-
return reorder { CA.sort_addr(*list) }
|
353
|
-
end
|
354
|
-
|
355
|
-
def reverse
|
356
|
-
new_columns = {}
|
357
|
-
@column_names.each do |name|
|
358
|
-
new_columns[name] = @columns[name].reverse
|
359
|
-
end
|
360
|
-
return CADataFrame.new(new_columns, @row_index ? @row_index.reverse : nil)
|
361
|
-
end
|
362
|
-
|
363
|
-
def transpose (header = nil)
|
364
|
-
if header
|
365
|
-
column_names = header.map(&:to_s)
|
366
|
-
else
|
367
|
-
if @row_index
|
368
|
-
column_names = @row_index.convert(:object) {|v| v.to_s }
|
369
|
-
else
|
370
|
-
column_names = CArray.object(@row_number).seq("a",:succ)
|
371
|
-
end
|
372
|
-
end
|
373
|
-
return CADataFrame.new(ca.transpose, @column_names.to_ca, column_names)
|
374
|
-
end
|
375
|
-
|
376
|
-
def histogram (name, scale = nil, options = nil)
|
377
|
-
if scale.nil?
|
378
|
-
return group_by(name).table{ { :count => col(name).count_valid } }
|
379
|
-
else
|
380
|
-
if options
|
381
|
-
hist = CAHistogram.int(scale, options)
|
382
|
-
else
|
383
|
-
hist = CAHistogram.int(scale)
|
384
|
-
end
|
385
|
-
hist.increment(@columns[name.to_s])
|
386
|
-
hash = {
|
387
|
-
name.to_s => hist.midpoints[0],
|
388
|
-
"#{name}_L".to_s => scale[0..-2],
|
389
|
-
"#{name}_R".to_s => scale.shift(-1)[0..-2],
|
390
|
-
:count => hist[0..-2].to_ca,
|
391
|
-
}
|
392
|
-
return CADataFrame.new(hash)
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
def classify (name, scale = nil, opt = {})
|
397
|
-
if not scale
|
398
|
-
column = @columns[name.to_s]
|
399
|
-
mids = column.uniq
|
400
|
-
mapper = {}
|
401
|
-
mids.each_with_index do |v,i|
|
402
|
-
mapper[v] = i
|
403
|
-
end
|
404
|
-
cls = columns.convert(:int32) {|v| mapper[v] }
|
405
|
-
hash = {
|
406
|
-
"#{name}_M" => mids,
|
407
|
-
"#{name}_L" => mids,
|
408
|
-
"#{name}_R" => mids,
|
409
|
-
"#{name}_CLASS" => cls
|
410
|
-
}
|
411
|
-
else
|
412
|
-
option = {
|
413
|
-
:include_upper => false,
|
414
|
-
:include_lowest => true,
|
415
|
-
:offset => 0,
|
416
|
-
}.update(opt)
|
417
|
-
column = @columns[name.to_s]
|
418
|
-
cls = scale.bin(column,
|
419
|
-
option[:include_upper],
|
420
|
-
option[:include_lowest],
|
421
|
-
option[:offset])
|
422
|
-
mids = ((scale + scale.shifted(-1))/2)[0..-2].to_ca
|
423
|
-
left = scale[0..-2]
|
424
|
-
right = scale.shift(-1)[0..-2]
|
425
|
-
hash = {
|
426
|
-
"#{name}_M" => mids.project(cls).to_ca,
|
427
|
-
"#{name}_L" => left.project(cls).to_ca,
|
428
|
-
"#{name}_R" => right.project(cls).to_ca,
|
429
|
-
"#{name}_CLASS" => cls
|
430
|
-
}
|
431
|
-
end
|
432
|
-
return CADataFrame.new(hash)
|
433
|
-
end
|
434
|
-
|
435
|
-
def suffix (suf)
|
436
|
-
new_columns = {}
|
437
|
-
@column_names.each do |name|
|
438
|
-
new_name = (name.to_s + suf).to_s
|
439
|
-
new_columns[new_name] = @columns[name]
|
440
|
-
end
|
441
|
-
return CADataFrame.new(new_columns, @row_index)
|
442
|
-
end
|
443
|
-
|
444
|
-
def ca (*names)
|
445
|
-
if names.empty?
|
446
|
-
return CADFArray.new(@column_names, @columns)
|
447
|
-
else
|
448
|
-
return CADFArray.new(names.map(&:to_s), @columns)
|
449
|
-
end
|
450
|
-
end
|
451
|
-
|
452
|
-
def to_ca (*names)
|
453
|
-
return ca(*names).to_ca
|
454
|
-
end
|
455
|
-
|
456
|
-
def to_hash (name1, name2)
|
457
|
-
return CArray.join([@columns[name1.to_s], @columns[name2.to_s]]).to_a.to_h
|
458
|
-
end
|
459
|
-
|
460
|
-
def ascii_table (rowmax = :full)
|
461
|
-
if @row_index
|
462
|
-
namelist = [""] + @column_names
|
463
|
-
tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
|
464
|
-
else
|
465
|
-
namelist = @column_names
|
466
|
-
tbl = to_ca
|
467
|
-
end
|
468
|
-
if rowmax.is_a?(Integer) and @row_number > rowmax
|
469
|
-
list = tbl[0..(rowmax/2),nil].to_a
|
470
|
-
list.push namelist.map { "..." }
|
471
|
-
list.push *(tbl[-rowmax/2+1..-1,nil].to_a)
|
472
|
-
tbl = list.to_ca
|
473
|
-
end
|
474
|
-
datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("")
|
475
|
-
datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0)
|
476
|
-
namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0)
|
477
|
-
mb = datamb.or(namemb)
|
478
|
-
namelen = namelist.map(&:length).to_ca
|
479
|
-
datalen = datastr.convert(&:length)
|
480
|
-
if mb.max == 0
|
481
|
-
if datalen.size == 0
|
482
|
-
lengths = namelen.to_a
|
483
|
-
else
|
484
|
-
lengths = datalen.max(0).pmax(namelen).to_a
|
485
|
-
end
|
486
|
-
hrule = "-" + lengths.map {|len| "-"*len}.join("--") + "-"
|
487
|
-
header = " " +
|
488
|
-
[namelist, lengths].transpose.map{|name, len|
|
489
|
-
"#{name.to_s.ljust(len)}" }.join(" ") + " "
|
490
|
-
ary = [hrule, header, hrule]
|
491
|
-
if datalen.size > 0
|
492
|
-
datastr[:i,nil].each_with_index do |blk, i|
|
493
|
-
list = blk.flatten.to_a
|
494
|
-
ary << " " + [list, lengths].transpose.map{|value, len|
|
495
|
-
"#{value.ljust(len)}"}.join(" ") + " "
|
496
|
-
end
|
497
|
-
end
|
498
|
-
ary << hrule
|
499
|
-
return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n")
|
500
|
-
else
|
501
|
-
namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) }
|
502
|
-
if datalen.size == 0
|
503
|
-
maxwidth = namewidth
|
504
|
-
else
|
505
|
-
datawidth = datastr.convert{|c| __strwidth__(c.to_s) }
|
506
|
-
maxwidth = datawidth.max(0).pmax(namewidth)
|
507
|
-
end
|
508
|
-
len = maxwidth[:*,nil] - datawidth + datalen
|
509
|
-
hrule = "-" + maxwidth.map {|len| "-"*len}.join("--") + "-"
|
510
|
-
header = " " +
|
511
|
-
[namelist, maxwidth.to_a].transpose.map{|name, len|
|
512
|
-
"#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}" }.join(" ") + " "
|
513
|
-
ary = [hrule, header, hrule]
|
514
|
-
if datalen.size > 0
|
515
|
-
datastr[:i,nil].each_with_addr do |blk, i|
|
516
|
-
list = blk.flatten.to_a
|
517
|
-
ary << " " + list.map.with_index {|value, j|
|
518
|
-
"#{value.ljust(len[i,j])}"}.join(" ") + " "
|
519
|
-
end
|
520
|
-
end
|
521
|
-
ary << hrule
|
522
|
-
return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n")
|
523
|
-
end
|
524
|
-
end
|
525
|
-
|
526
|
-
def __obj_to_string__ (obj)
|
527
|
-
case obj
|
528
|
-
when Float
|
529
|
-
"%.6g" % obj
|
530
|
-
else
|
531
|
-
obj.to_s
|
532
|
-
end
|
533
|
-
end
|
534
|
-
|
535
|
-
def __strwidth__ (string)
|
536
|
-
if string.ascii_only?
|
537
|
-
return string.length
|
538
|
-
else
|
539
|
-
return string.each_char.inject(0){|s,c| s += c.bytesize > 1 ? 2 : 1 }
|
540
|
-
end
|
541
|
-
end
|
542
|
-
|
543
|
-
def inspect
|
544
|
-
return ascii_table(10)
|
545
|
-
end
|
546
|
-
|
547
|
-
def to_s
|
548
|
-
return ascii_table
|
549
|
-
end
|
550
|
-
|
551
|
-
def to_ary
|
552
|
-
return [to_s]
|
553
|
-
end
|
554
|
-
|
555
|
-
def to_csv (with_row_index: true)
|
556
|
-
if @row_index and with_row_index
|
557
|
-
namelist = [""] + @column_names
|
558
|
-
tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
|
559
|
-
else
|
560
|
-
namelist = @column_names
|
561
|
-
tbl = ca
|
562
|
-
end
|
563
|
-
output = []
|
564
|
-
output << namelist.map(&:to_s).join(",")
|
565
|
-
output << tbl.to_csv
|
566
|
-
return output.join("\n")
|
567
|
-
end
|
568
|
-
|
569
|
-
def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: true, &block)
|
570
|
-
require "axlsx"
|
571
|
-
xl = Axlsx::Package.new
|
572
|
-
xl.use_shared_strings = true
|
573
|
-
sheet = xl.workbook.add_worksheet(name: sheet_name)
|
574
|
-
sheet.add_row(column_names)
|
575
|
-
each_row(with_row_index: with_row_index) do |list|
|
576
|
-
sheet.add_row(list)
|
577
|
-
end
|
578
|
-
if block_given?
|
579
|
-
yield sheet
|
580
|
-
end
|
581
|
-
xl.serialize(filename)
|
582
|
-
end
|
583
|
-
|
584
|
-
def method_missing (name, *args)
|
585
|
-
if args.size == 0
|
586
|
-
name = name.to_s
|
587
|
-
if @column_names.include?(name)
|
588
|
-
return @columns[name]
|
589
|
-
elsif @column_names.include?(name.gsub(/_/,'.')) ### For R
|
590
|
-
return @columns[name.gsub(/_/,'.')]
|
591
|
-
elsif @__methods__.include?(name)
|
592
|
-
return @columns[@__methods__[name]]
|
593
|
-
end
|
594
|
-
end
|
595
|
-
super
|
596
|
-
end
|
597
|
-
|
598
|
-
end
|
599
|
-
|
600
|
-
#############################################################
|
601
|
-
#
|
602
|
-
# ARRANGER
|
603
|
-
#
|
604
|
-
#############################################################
|
605
|
-
|
606
|
-
|
607
|
-
class CADataFrame
|
608
|
-
|
609
|
-
class Arranger
|
610
|
-
|
611
|
-
def initialize (dataframe)
|
612
|
-
@dataframe = dataframe
|
613
|
-
end
|
614
|
-
|
615
|
-
def arrange (&block)
|
616
|
-
instance_exec(&block)
|
617
|
-
return @dataframe
|
618
|
-
end
|
619
|
-
|
620
|
-
private
|
621
|
-
|
622
|
-
def column_names
|
623
|
-
return @dataframe.column_names
|
624
|
-
end
|
625
|
-
|
626
|
-
def row_number
|
627
|
-
return @dataframe.row_number
|
628
|
-
end
|
629
|
-
|
630
|
-
def method (hash)
|
631
|
-
@dataframe.method(hash)
|
632
|
-
end
|
633
|
-
|
634
|
-
def timeseries (name, fmt = "%Y-%m-%d %H:%M:%S")
|
635
|
-
@dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].strptime(fmt)
|
636
|
-
end
|
637
|
-
|
638
|
-
def type (type, name, mask = :novalue)
|
639
|
-
@dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
|
640
|
-
if mask != :novalue
|
641
|
-
@dataframe.columns[name.to_s].maskout!(options[:maskout])
|
642
|
-
end
|
643
|
-
end
|
644
|
-
|
645
|
-
def eliminate (*names)
|
646
|
-
if names.empty?
|
647
|
-
return self
|
648
|
-
end
|
649
|
-
names = names.map(&:to_s)
|
650
|
-
@dataframe.column_names.each do |name|
|
651
|
-
if names.include?(name)
|
652
|
-
@dataframe.columns.delete(name)
|
653
|
-
@dataframe.column_names.delete(name)
|
654
|
-
end
|
655
|
-
end
|
656
|
-
end
|
657
|
-
|
658
|
-
def template (*args, &block)
|
659
|
-
return @dataframe.template(*args, &block)
|
660
|
-
end
|
661
|
-
|
662
|
-
def double (*names)
|
663
|
-
names.flatten.each do |name|
|
664
|
-
type(:double, name)
|
665
|
-
end
|
666
|
-
end
|
667
|
-
|
668
|
-
def int (*names)
|
669
|
-
names.flatten.each do |name|
|
670
|
-
type(:int, name)
|
671
|
-
end
|
672
|
-
end
|
673
|
-
|
674
|
-
def maskout (value, *names)
|
675
|
-
names.flatten.each do |name|
|
676
|
-
@dataframe.columns[name.to_s].maskout!(value)
|
677
|
-
end
|
678
|
-
end
|
679
|
-
|
680
|
-
def unmask (value, *names)
|
681
|
-
names.flatten.each do |name|
|
682
|
-
@dataframe.columns[name.to_s].unmask(value)
|
683
|
-
end
|
684
|
-
end
|
685
|
-
|
686
|
-
def col (name)
|
687
|
-
return @dataframe.col(name)
|
688
|
-
end
|
689
|
-
|
690
|
-
def append (name, new_column)
|
691
|
-
if new_column
|
692
|
-
# do nothing
|
693
|
-
else
|
694
|
-
new_column = @dataframe.columns.first[1].template(:object)
|
695
|
-
end
|
696
|
-
unless new_column.is_a?(CArray)
|
697
|
-
new_column = new_column.to_ca
|
698
|
-
end
|
699
|
-
@dataframe.columns[name.to_s] = new_column
|
700
|
-
@dataframe.column_names.push(name.to_s)
|
701
|
-
end
|
702
|
-
|
703
|
-
def lead (name, new_column)
|
704
|
-
if new_column
|
705
|
-
# do nothing
|
706
|
-
else
|
707
|
-
new_column = @dataframe.columns.first[1].template(:object)
|
708
|
-
end
|
709
|
-
unless new_column.is_a?(CArray)
|
710
|
-
new_column = new_column.to_ca
|
711
|
-
end
|
712
|
-
@dataframe.columns[name.to_s] = new_column
|
713
|
-
@dataframe.column_names.unshift(name.to_s)
|
714
|
-
end
|
715
|
-
|
716
|
-
def rename (name1, name2)
|
717
|
-
if idx = @dataframe.column_names.index(name1.to_s)
|
718
|
-
@dataframe.column_names[idx] = name2.to_s
|
719
|
-
column = @dataframe.columns[name1.to_s]
|
720
|
-
@dataframe.columns.delete(name1.to_s)
|
721
|
-
@dataframe.columns[name2.to_s] = column
|
722
|
-
else
|
723
|
-
raise "unknown column name #{name1}"
|
724
|
-
end
|
725
|
-
end
|
726
|
-
|
727
|
-
def downcase
|
728
|
-
@dataframe.downcase
|
729
|
-
end
|
730
|
-
|
731
|
-
def classify (name, scale, opt = {})
|
732
|
-
return @dataframe.classify(name, scale, opt)
|
733
|
-
end
|
734
|
-
|
735
|
-
def map (mapper, name_or_column)
|
736
|
-
case name_or_column
|
737
|
-
when String, Symbol
|
738
|
-
name = name_or_column
|
739
|
-
column = @dataframe.columns[name.to_s]
|
740
|
-
when CArray
|
741
|
-
column = name_or_column
|
742
|
-
when Array
|
743
|
-
column = name_or_column.to_ca
|
744
|
-
else
|
745
|
-
raise "invalid argument"
|
746
|
-
end
|
747
|
-
case mapper
|
748
|
-
when Hash
|
749
|
-
return column.convert(:object) {|v| hash[v] }
|
750
|
-
when CArray
|
751
|
-
return mapper.project(column)
|
752
|
-
when Array
|
753
|
-
return mapper.to_ca.project(column)
|
754
|
-
end
|
755
|
-
end
|
756
|
-
|
757
|
-
def method_missing (name, *args)
|
758
|
-
if args.size == 0
|
759
|
-
if @dataframe.column_names.include?(name.to_s)
|
760
|
-
return @dataframe.columns[name.to_s]
|
761
|
-
elsif @dataframe.__methods__.include?(name.to_s)
|
762
|
-
return @dataframe.columns[@dataframe.__methods__[name.to_s]]
|
763
|
-
end
|
764
|
-
end
|
765
|
-
super
|
766
|
-
end
|
767
|
-
|
768
|
-
end
|
769
|
-
|
770
|
-
end
|
771
|
-
|
772
|
-
#############################################################
|
773
|
-
#
|
774
|
-
# Class methods
|
775
|
-
#
|
776
|
-
#############################################################
|
777
|
-
|
778
|
-
class CADataFrame
|
779
|
-
|
780
|
-
def self.load_sqlite3 (*args)
|
781
|
-
return CArray.load_sqlite3(*args).to_dataframe.arrange{ maskout nil, *column_names }
|
782
|
-
end
|
783
|
-
|
784
|
-
def to_sqlite3 (*args)
|
785
|
-
ca = self.ca.to_ca
|
786
|
-
ca.extend CA::TableMethods
|
787
|
-
ca.column_names = column_names
|
788
|
-
ca.to_sqlite3(*args)
|
789
|
-
end
|
790
|
-
|
791
|
-
def self.load_csv (*args, &block)
|
792
|
-
return CArray.load_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
|
793
|
-
end
|
794
|
-
|
795
|
-
def self.from_csv (*args, &block)
|
796
|
-
return CArray.from_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
|
797
|
-
end
|
798
|
-
|
799
|
-
def self.merge (*args)
|
800
|
-
ref = args.first
|
801
|
-
new_columns = {}
|
802
|
-
args.each do |table|
|
803
|
-
table.column_names.each do |name|
|
804
|
-
new_columns[name] = table.col(name)
|
805
|
-
end
|
806
|
-
end
|
807
|
-
return CADataFrame.new(new_columns, ref.row_index)
|
808
|
-
end
|
809
|
-
|
810
|
-
def self.concat (*args)
|
811
|
-
ref = args.first
|
812
|
-
column_names = ref.column_names
|
813
|
-
new_columns = {}
|
814
|
-
column_names.each do |name|
|
815
|
-
list = args.map{|t| t.col(name) }
|
816
|
-
data_type = list.first.data_type
|
817
|
-
new_columns[name] = CArray.bind(data_type, list, 0)
|
818
|
-
end
|
819
|
-
if args.map(&:row_index).all?
|
820
|
-
new_row_index = CArray.join(*args.map(&:row_index))
|
821
|
-
else
|
822
|
-
new_row_index = nil
|
823
|
-
end
|
824
|
-
return CADataFrame.new(new_columns, new_row_index)
|
825
|
-
end
|
826
|
-
|
827
|
-
|
828
|
-
end
|
829
|
-
|
830
|
-
#############################################################
|
831
|
-
#
|
832
|
-
# CADFArray
|
833
|
-
#
|
834
|
-
#############################################################
|
835
|
-
|
836
|
-
class CADFArray < CAObject # :nodoc:
|
837
|
-
|
838
|
-
def initialize (column_names, columns)
|
839
|
-
@column_names = column_names
|
840
|
-
@columns = columns
|
841
|
-
dim = [@columns[@column_names.first].size, @column_names.size]
|
842
|
-
extend CA::TableMethods
|
843
|
-
super(:object, dim, :read_only=>true)
|
844
|
-
__create_mask__
|
845
|
-
end
|
846
|
-
|
847
|
-
attr_reader :column_names
|
848
|
-
|
849
|
-
def fetch_index (idx)
|
850
|
-
r, c = *idx
|
851
|
-
name = @column_names[c]
|
852
|
-
return @columns[name][r]
|
853
|
-
end
|
854
|
-
|
855
|
-
def copy_data (data)
|
856
|
-
@column_names.each_with_index do |name, i|
|
857
|
-
data[nil,i] = @columns[name].value
|
858
|
-
end
|
859
|
-
end
|
860
|
-
|
861
|
-
def create_mask
|
862
|
-
end
|
863
|
-
|
864
|
-
def mask_fetch_index (idx)
|
865
|
-
r, c = *idx
|
866
|
-
name = @column_names[c]
|
867
|
-
if @columns[name].has_mask?
|
868
|
-
return @columns[name].mask[r]
|
869
|
-
else
|
870
|
-
return 0
|
871
|
-
end
|
872
|
-
end
|
873
|
-
|
874
|
-
def mask_copy_data (data)
|
875
|
-
@column_names.each_with_index do |name, i|
|
876
|
-
if @columns[name].has_mask?
|
877
|
-
data[nil,i] = @columns[name].mask
|
878
|
-
end
|
879
|
-
end
|
880
|
-
end
|
881
|
-
|
882
|
-
end
|
883
|
-
|
884
|
-
|
885
|
-
#############################################################
|
886
|
-
#
|
887
|
-
# GROUPING
|
888
|
-
#
|
889
|
-
#############################################################
|
890
|
-
|
891
|
-
class CADataFrame
|
892
|
-
|
893
|
-
def group_by (*names)
|
894
|
-
if names.size == 1
|
895
|
-
return CADataFrameGroup.new(self, names[0])
|
896
|
-
else
|
897
|
-
return CADataFrameGroupMulti.new(self, *names)
|
898
|
-
end
|
899
|
-
end
|
900
|
-
|
901
|
-
end
|
902
|
-
|
903
|
-
class CADataFrameGroup
|
904
|
-
|
905
|
-
def initialize (dataframe, name)
|
906
|
-
@dataframe = dataframe
|
907
|
-
case name
|
908
|
-
when Hash
|
909
|
-
name, list = name.first
|
910
|
-
@column = @dataframe.col(name)
|
911
|
-
@keys = list.to_ca
|
912
|
-
else
|
913
|
-
@column = @dataframe.col(name)
|
914
|
-
@keys = @column.uniq.sort
|
915
|
-
end
|
916
|
-
@name = name.to_s
|
917
|
-
@addrs = {}
|
918
|
-
@keys.each do |k|
|
919
|
-
@addrs[k] = @column.eq(k).where
|
920
|
-
end
|
921
|
-
end
|
922
|
-
|
923
|
-
def table (&block)
|
924
|
-
hashpool = []
|
925
|
-
@keys.each do |k|
|
926
|
-
hashpool << @dataframe[@addrs[k]].execute(&block)
|
927
|
-
end
|
928
|
-
columns = {@name=>@keys}
|
929
|
-
hashpool.each_with_index do |hash, i|
|
930
|
-
hash.each do |key, value|
|
931
|
-
columns[key] ||= []
|
932
|
-
columns[key][i] = value
|
933
|
-
end
|
934
|
-
end
|
935
|
-
return CADataFrame.new(columns)
|
936
|
-
end
|
937
|
-
|
938
|
-
def calculate (label, &block)
|
939
|
-
new_columns = {@name=>@keys}
|
940
|
-
@dataframe.each_column do |name, column|
|
941
|
-
if name == @name
|
942
|
-
next
|
943
|
-
end
|
944
|
-
new_columns[name] = CArray.object(@keys.size) { UNDEF }
|
945
|
-
@keys.each_with_index do |k, i|
|
946
|
-
begin
|
947
|
-
if block
|
948
|
-
new_columns[name][i] = yield(name, column[@addrs[k]])
|
949
|
-
else
|
950
|
-
new_columns[name][i] = column[@addrs[k]].send(label.intern)
|
951
|
-
end
|
952
|
-
rescue
|
953
|
-
end
|
954
|
-
end
|
955
|
-
end
|
956
|
-
return CADataFrame.new(new_columns)
|
957
|
-
end
|
958
|
-
|
959
|
-
def [] (group_value)
|
960
|
-
if map = @addrs[group_value]
|
961
|
-
return @dataframe[map]
|
962
|
-
else
|
963
|
-
return @dataframe.vacant_copy
|
964
|
-
end
|
965
|
-
end
|
966
|
-
|
967
|
-
|
968
|
-
end
|
969
|
-
|
970
|
-
class CADataFrameGroupMulti
|
971
|
-
|
972
|
-
def initialize (dataframe, *names)
|
973
|
-
@rank = names.size
|
974
|
-
@dataframe = dataframe
|
975
|
-
@names = []
|
976
|
-
@column = []
|
977
|
-
@keys = []
|
978
|
-
names.each_with_index do |name, i|
|
979
|
-
case name
|
980
|
-
when Hash
|
981
|
-
name, list = name.first
|
982
|
-
@column[i] = @dataframe.col(name)
|
983
|
-
@keys[i] = list.to_ca
|
984
|
-
else
|
985
|
-
@column[i] = @dataframe.col(name)
|
986
|
-
@keys[i] = @column[i].to_ca.uniq.sort
|
987
|
-
end
|
988
|
-
@names[i] = name
|
989
|
-
end
|
990
|
-
@addrs = {}
|
991
|
-
each_with_keys do |list|
|
992
|
-
flag = @column[0].eq(list[0])
|
993
|
-
(1...@rank).each do |i|
|
994
|
-
flag &= @column[i].eq(list[i])
|
995
|
-
end
|
996
|
-
@addrs[list] = flag.where
|
997
|
-
end
|
998
|
-
end
|
999
|
-
|
1000
|
-
def each_with_keys (&block)
|
1001
|
-
@keys[0].to_a.product(*@keys[1..-1].map(&:to_a)).each(&block)
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
def table (&block)
|
1005
|
-
hashpool = []
|
1006
|
-
each_with_keys do |list|
|
1007
|
-
hashpool << @dataframe[@addrs[list]].execute(&block)
|
1008
|
-
end
|
1009
|
-
columns = {}
|
1010
|
-
@names.each do |name|
|
1011
|
-
columns[name] = []
|
1012
|
-
end
|
1013
|
-
each_with_keys.with_index do |list,j|
|
1014
|
-
@names.each_with_index do |name,i|
|
1015
|
-
columns[name][j] = list[i]
|
1016
|
-
end
|
1017
|
-
end
|
1018
|
-
hashpool.each_with_index do |hash, i|
|
1019
|
-
hash.each do |key, value|
|
1020
|
-
columns[key] ||= []
|
1021
|
-
columns[key][i] = value
|
1022
|
-
end
|
1023
|
-
end
|
1024
|
-
return CADataFrame.new(columns)
|
1025
|
-
end
|
1026
|
-
|
1027
|
-
def [] (group_value)
|
1028
|
-
if map = @addrs[group_value]
|
1029
|
-
return @dataframe[map]
|
1030
|
-
else
|
1031
|
-
return @dataframe.vacant_copy
|
1032
|
-
end
|
1033
|
-
end
|
1034
|
-
|
1035
|
-
def each
|
1036
|
-
each_with_keys do |key|
|
1037
|
-
yield key, @dataframe[@addrs[key]]
|
1038
|
-
end
|
1039
|
-
end
|
1040
|
-
|
1041
|
-
end
|
1042
|
-
|
1043
|
-
|
1044
|
-
#############################################################
|
1045
|
-
#
|
1046
|
-
# PIVOT TABLE
|
1047
|
-
#
|
1048
|
-
#############################################################
|
1049
|
-
|
1050
|
-
class CADataFrame
|
1051
|
-
|
1052
|
-
def pivot (name1, name2)
|
1053
|
-
return CADataFramePivot.new(self, name1, name2)
|
1054
|
-
end
|
1055
|
-
|
1056
|
-
end
|
1057
|
-
|
1058
|
-
class CADataFramePivot
|
1059
|
-
|
1060
|
-
def initialize (dataframe, name1, name2)
|
1061
|
-
@dataframe = dataframe
|
1062
|
-
case name1
|
1063
|
-
when Hash
|
1064
|
-
name1, list = name1.first
|
1065
|
-
@column1 = @dataframe.col(name1)
|
1066
|
-
@keys1 = list.to_ca
|
1067
|
-
else
|
1068
|
-
@column1 = @dataframe.col(name1)
|
1069
|
-
@keys1 = @column1.uniq.sort
|
1070
|
-
end
|
1071
|
-
case name2
|
1072
|
-
when Hash
|
1073
|
-
name2, list = name2.first
|
1074
|
-
@column2 = @dataframe.col(name2)
|
1075
|
-
@keys2 = list
|
1076
|
-
else
|
1077
|
-
@column2 = @dataframe.col(name2)
|
1078
|
-
@keys2 = @column2.uniq.sort
|
1079
|
-
end
|
1080
|
-
@addrs = {}
|
1081
|
-
@keys1.each do |k1|
|
1082
|
-
@keys2.each do |k2|
|
1083
|
-
@addrs[[k1,k2]] = (@column1.eq(k1) & @column2.eq(k2)).where
|
1084
|
-
end
|
1085
|
-
end
|
1086
|
-
end
|
1087
|
-
|
1088
|
-
def table (&block)
|
1089
|
-
columns = {}
|
1090
|
-
@keys2.each do |k2|
|
1091
|
-
columns[k2] = CArray.object(@keys1.size) { UNDEF }
|
1092
|
-
end
|
1093
|
-
@keys1.each_with_index do |k1, i|
|
1094
|
-
@keys2.each do |k2|
|
1095
|
-
columns[k2][i] = @dataframe[@addrs[[k1,k2]]].execute(&block)
|
1096
|
-
end
|
1097
|
-
end
|
1098
|
-
return CADataFrame.new(columns, @keys1)
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
end
|
1102
|
-
|
1103
|
-
|
1104
|
-
|