carray 1.1.7 → 1.1.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -414,6 +414,9 @@ module CA
414
414
  @table = CArray.merge(CA_OBJECT, table[nil].fields)
415
415
  else
416
416
  @names = table.instance_exec{ @names }
417
+ if @names.nil?
418
+ @names = table.instance_exec{ @column_names }
419
+ end
417
420
  case
418
421
  when table.rank > 2
419
422
  @table = table.reshape(false,nil).object
@@ -451,7 +454,7 @@ module CA
451
454
  end
452
455
 
453
456
  # puts header
454
- def header (list=@names)
457
+ def header (list = @names)
455
458
  @io.write list.map{|s| csv_quote(s)}.join(@sep)
456
459
  @io.write(@rs)
457
460
  end
@@ -464,25 +467,29 @@ module CA
464
467
 
465
468
  # write value
466
469
  # If option :strict is set, do csv_quote for string element
467
- def body (opt = {:strict=>true})
468
- if opt[:strict]
470
+ def body (strict: true, format: nil)
471
+ if strict
469
472
  case @table.data_type
470
473
  when CA_OBJECT
471
- table = @table
474
+ table = @table.to_ca
472
475
  table[:is_kind_of, String].map! { |s| csv_quote(s) }
473
476
  when CA_FIXLEN
474
477
  table = @table.object
475
478
  table.map! { |s| csv_quote(s) }
476
479
  else
477
- table = @table.object
480
+ table = @table.object
478
481
  end
482
+ else
483
+ table = @table
484
+ end
485
+ if format
479
486
  table.dim0.times do |i|
480
- @io.write table[i,nil].to_a.join(@sep)
487
+ @io.write Kernel::format(format,*table[i,nil].to_a)
481
488
  @io.write(@rs)
482
- end
489
+ end
483
490
  else
484
- @table.dim0.times do |i|
485
- @io.write @table[i,nil].to_a.join(@sep)
491
+ table.dim0.times do |i|
492
+ @io.write table[i,nil].to_a.join(@sep)
486
493
  @io.write(@rs)
487
494
  end
488
495
  end
@@ -68,8 +68,13 @@ class CArray
68
68
  insert.execute [self[i]]
69
69
  end
70
70
  else
71
- dim0.times do |i|
72
- insert.execute self[i,nil].to_a
71
+ begin
72
+ dim0.times do |i|
73
+ insert.execute self[i,nil].to_a
74
+ end
75
+ rescue
76
+ puts self[i,nil].to_a
77
+ raise $!
73
78
  end
74
79
  end
75
80
  database.commit if transaction
@@ -155,7 +155,7 @@ class TestCArrayRefStore < Test::Unit::TestCase
155
155
  a = CArray.int(3,3).seq!
156
156
  assert_equal(CA_INT([4]), a[[4]])
157
157
  assert_equal(CA_INT([0,1,2,3]), a[[0..3]])
158
- assert_equal(CABlock, a[nil].class)
158
+ assert_equal(CARefer, a[nil].class)
159
159
  assert_equal(CA_INT([0,1,2,3,4,5,6,7,8]), a[nil])
160
160
  assert_raise(IndexError) { a[0..9] }
161
161
  assert_raise(IndexError) { a[-10..-1] }
data/version.h CHANGED
@@ -10,9 +10,9 @@
10
10
 
11
11
  ---------------------------------------------------------------------------- */
12
12
 
13
- #define CA_VERSION "1.1.7"
14
- #define CA_VERSION_CODE 117
13
+ #define CA_VERSION "1.1.8"
14
+ #define CA_VERSION_CODE 118
15
15
  #define CA_VERSION_MAJOR 1
16
16
  #define CA_VERSION_MINOR 1
17
- #define CA_VERSION_TEENY 7
18
- #define CA_VERSION_DATE "2016/05/06"
17
+ #define CA_VERSION_TEENY 8
18
+ #define CA_VERSION_DATE "2016/05/10"
metadata CHANGED
@@ -1,15 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: carray
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hiroki Motoyoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-05-06 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2016-08-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.6.1.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.6.1.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: narray_miss
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.3'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.3'
41
+ - !ruby/object:Gem::Dependency
42
+ name: axlsx
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '2.0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '2.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: spreadsheet
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.1'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.1'
69
+ - !ruby/object:Gem::Dependency
70
+ name: sqlite3
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.3'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.3'
13
83
  description: |2
14
84
  CArray is a uniform multi-dimensional rectangular array class.
15
85
  It provides the various types of sub-arrays and references
@@ -20,7 +90,6 @@ executables: []
20
90
  extensions:
21
91
  - extconf.rb
22
92
  - ext/calculus/extconf.rb
23
- - ext/dataframe/extconf.rb
24
93
  - ext/fortio/extconf.rb
25
94
  - ext/imagemap/extconf.rb
26
95
  - ext/mathfunc/extconf.rb
@@ -84,17 +153,15 @@ files:
84
153
  - carray_test.c
85
154
  - carray_undef.c
86
155
  - carray_utils.c
156
+ - devel/guess_shape.rb
157
+ - examples/ex001.rb
158
+ - examples/test-int.rb
87
159
  - ext/calculus/carray_calculus.c
88
160
  - ext/calculus/carray_interp.c
89
161
  - ext/calculus/extconf.rb
90
162
  - ext/calculus/lib/autoload/autoload_math_calculus.rb
91
163
  - ext/calculus/lib/math/calculus.rb
92
164
  - ext/calculus/lib/math/interp/adapter_interp1d.rb
93
- - ext/dataframe/API.txt
94
- - ext/dataframe/extconf.rb
95
- - ext/dataframe/lib/carray/autoload/autoload_dataframe_dataframe.rb
96
- - ext/dataframe/lib/carray/dataframe/dataframe.rb
97
- - ext/dataframe/sample/test_uniq_sort.rb
98
165
  - ext/fortio/extconf.rb
99
166
  - ext/fortio/lib/carray/autoload/autoload_fortran_format.rb
100
167
  - ext/fortio/lib/carray/io/fortran_format.rb
@@ -152,6 +219,7 @@ files:
152
219
  - lib/carray.rb
153
220
  - lib/carray/autoload/autoload_base.rb
154
221
  - lib/carray/autoload/autoload_graphics_gnuplot.rb
222
+ - lib/carray/autoload/autoload_graphics_zimg.rb
155
223
  - lib/carray/autoload/autoload_io_csv.rb
156
224
  - lib/carray/autoload/autoload_io_excel.rb
157
225
  - lib/carray/autoload/autoload_io_imagemagick.rb
@@ -173,6 +241,7 @@ files:
173
241
  - lib/carray/base/serialize.rb
174
242
  - lib/carray/base/struct.rb
175
243
  - lib/carray/graphics/gnuplot.rb
244
+ - lib/carray/graphics/zimg.rb
176
245
  - lib/carray/info.rb
177
246
  - lib/carray/io/csv.rb
178
247
  - lib/carray/io/excel.rb
@@ -264,13 +333,14 @@ licenses: []
264
333
  metadata: {}
265
334
  post_install_message:
266
335
  rdoc_options:
267
- - "--main=rdoc_main.rb"
336
+ - "--main rdoc_main.rb"
268
337
  - rdoc_main.rb
269
338
  - rdoc_ext.rb
270
339
  - rdoc_math.rb
271
340
  - rdoc_stat.rb
272
341
  - lib/carray/autoload/autoload_base.rb
273
342
  - lib/carray/autoload/autoload_graphics_gnuplot.rb
343
+ - lib/carray/autoload/autoload_graphics_zimg.rb
274
344
  - lib/carray/autoload/autoload_io_csv.rb
275
345
  - lib/carray/autoload/autoload_io_excel.rb
276
346
  - lib/carray/autoload/autoload_io_imagemagick.rb
@@ -292,6 +362,7 @@ rdoc_options:
292
362
  - lib/carray/base/serialize.rb
293
363
  - lib/carray/base/struct.rb
294
364
  - lib/carray/graphics/gnuplot.rb
365
+ - lib/carray/graphics/zimg.rb
295
366
  - lib/carray/info.rb
296
367
  - lib/carray/io/csv.rb
297
368
  - lib/carray/io/excel.rb
@@ -326,3 +397,4 @@ signing_key:
326
397
  specification_version: 4
327
398
  summary: Multi-dimesional array class
328
399
  test_files: []
400
+ has_rdoc: true
@@ -1,11 +0,0 @@
1
- CADataFrame
2
-
3
- #append(name) { INSTANCE_CONTEXT } <- any carray
4
- #lead(name) { INSTANCE_CONTEXT } <- any carray
5
- #execute { INSTANCE_CONTEXT } => any object
6
- #select(name...) { INSTANCE_CONTEXT } <- boolean carray
7
- #reorder { INSTANCE_CONTEXT } <- int32 carray (addresses for mapping)
8
- #order_by { INSTANCE_CONTEXT } <- Array of int32 carray or carray (addresses for mapping)
9
-
10
- #calculate {|label, column| CALLER_CONTEXT } <- scalar
11
- #resample {|label, column| CALLER_CONTEXT } <- any carray
@@ -1,3 +0,0 @@
1
- require "mkmf"
2
-
3
- create_makefile("dataframe")
@@ -1,14 +0,0 @@
1
-
2
- module CA::TableMethods
3
- autoload_method "to_dataframe", "carray/dataframe/dataframe"
4
- end
5
-
6
- autoload :CADataFrame, "carray/dataframe/dataframe"
7
-
8
- autoload :DataFrame, "carray/dataframe/dataframe"
9
- autoload :RSReceiver, "carray/dataframe/dataframe"
10
-
11
- class RSRuby
12
- autoload_method "setup", "carray/dataframe/dataframe"
13
- autoload_method "recieve", "carray/dataframe/dataframe"
14
- end
@@ -1,1104 +0,0 @@
1
- require "carray"
2
- require "carray/io/table"
3
-
4
- module CA::TableMethods
5
-
6
- def to_dataframe (&block)
7
- return CADataFrame.new(self, &block)
8
- end
9
-
10
- end
11
-
12
- class CADataFrame
13
-
14
- def initialize (columns_or_table, row_index = nil, column_names = nil, &block)
15
- case columns_or_table
16
- when Hash
17
- columns = columns_or_table
18
- @column_names = columns.keys.map(&:to_s)
19
- @columns = normalize_columns(columns)
20
- @column_number = @column_names.size
21
- @row_number = @columns.first[1].size
22
- if @column_names.any?{ |key| @columns[key].size != @row_number }
23
- raise "column sizes mismatch"
24
- end
25
- when CArray
26
- table = columns_or_table
27
- if column_names
28
- @column_names = column_names.map(&:to_s)
29
- else
30
- @column_names = table.column_names.map(&:to_s)
31
- end
32
- @columns = table_to_columns(table)
33
- @column_number = @column_names.size
34
- @row_number = table.dim0
35
- else
36
- raise "unknown data"
37
- end
38
- if row_index
39
- @row_index = row_index.to_ca.object
40
- else
41
- @row_index = nil
42
- end
43
- @__methods__ = {}
44
- if block_given?
45
- arrange(&block)
46
- end
47
- end
48
-
49
- def __methods__
50
- return @__methods__
51
- end
52
-
53
- private
54
-
55
- def table_to_columns (table)
56
- new_columns = {}
57
- @column_names.each_with_index do |name, i|
58
- new_columns[name] = table[nil,i]
59
- end
60
- return new_columns
61
- end
62
-
63
- def normalize_columns (columns)
64
- new_columns = {}
65
- columns.each_key do |key|
66
- case columns[key]
67
- when CArray
68
- column = columns[key]
69
- when Array
70
- column = columns[key].to_ca
71
- if column.rank != 1
72
- list = columns[key].clone
73
- column = CArray.object(list.size).convert { list.shift }
74
- end
75
- else
76
- column = columns[key].to_ca
77
- end
78
- new_columns[key.to_s] = column
79
- end
80
- return new_columns
81
- end
82
-
83
- public
84
-
85
- attr_reader :columns, :column_names, :row_index, :column_number, :row_number
86
-
87
- def column_types
88
- return @columns_names.map{|name| @columns[name].data_type_name }
89
- end
90
-
91
- def each_column (&block)
92
- return @columns.each(&block)
93
- end
94
-
95
- def each_row (with_row_index: false, &block)
96
- if with_row_index and @row_index
97
- @row_number.times do |i|
98
- yield [@row_index[i]] + @columns.map{|n,c| c[i] }
99
- end
100
- else
101
- @row_number.times do |i|
102
- yield @columns.map{|n,c| c[i] }
103
- end
104
- end
105
- return self
106
- end
107
-
108
- def method (hash)
109
- new_hash = {}
110
- hash.each do |key, value|
111
- new_hash[key.to_s] = value.to_s
112
- end
113
- @__methods__.update(new_hash)
114
- end
115
-
116
- def col (name_or_index)
117
- case name_or_index
118
- when Integer
119
- return @columns[@column_names[name_or_index]]
120
- when String, Symbol
121
- return @columns[name_or_index.to_s]
122
- end
123
- end
124
-
125
- def template (*args, &block)
126
- return @columns.first[1].template(*args, &block)
127
- end
128
-
129
- def row (idx)
130
- if @row_index
131
- addr = @row_index.search(idx)
132
- return @column_names.map{|name| @columns[name][addr]}.to_ca
133
- else
134
- return @column_names.map{|name| @columns[name][idx]}.to_ca
135
- end
136
- end
137
-
138
- def [] (row, col = nil)
139
- if row.is_a?(Integer)
140
- row = [row]
141
- end
142
- if col
143
- if col.is_a?(Integer)
144
- col = [col]
145
- end
146
- keys = @column_names.to_ca[col].to_a
147
- values = @columns.values_at(*keys)
148
- new_columns = {}
149
- keys.each do |key|
150
- new_columns[key] = @columns[key][row]
151
- end
152
- return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
153
- else
154
- new_columns = {}
155
- @column_names.each do |key|
156
- new_columns[key] = @columns[key][row]
157
- end
158
- return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
159
- end
160
- end
161
-
162
- def fill (*names, value)
163
- names.each do |name|
164
- @columns[name.to_s].fill(value)
165
- end
166
- return self
167
- end
168
-
169
- def arrange (&block)
170
- return Arranger.new(self).arrange(&block)
171
- end
172
-
173
- def rename (name1, name2)
174
- if idx = @column_names.index(name1.to_s)
175
- @column_names[idx] = name2.to_s
176
- column = @columns[name1.to_s]
177
- @columns.delete(name1.to_s)
178
- @columns[name2.to_s] = column
179
- else
180
- raise "unknown column name #{name1}"
181
- end
182
- end
183
-
184
- def downcase
185
- new_column_names = []
186
- new_columns = {}
187
- @column_names.each do |name|
188
- new_column_names << name.downcase
189
- new_columns[name.downcase] = @columns[name]
190
- end
191
- @column_names = new_column_names
192
- @columns = new_columns
193
- return self
194
- end
195
-
196
- def append (name, new_column = nil, &block)
197
- if new_column
198
- # do nothing
199
- elsif block
200
- new_column = instance_exec(&block)
201
- else
202
- new_column = @columns.first[1].template(:object)
203
- end
204
- unless new_column.is_a?(CArray)
205
- new_column = new_column.to_ca
206
- end
207
- new_columns = {}
208
- @column_names.each do |key|
209
- new_columns[key] = @columns[key]
210
- end
211
- new_columns[name.to_s] = new_column
212
- return CADataFrame.new(new_columns, @row_index)
213
- end
214
-
215
- def lead (name, new_column = nil, &block)
216
- if new_column
217
- # do nothing
218
- elsif block
219
- new_column = instance_exec(&block)
220
- else
221
- new_column = @columns.first[1].template(:object)
222
- end
223
- unless new_column.is_a?(CArray)
224
- new_column = new_column.to_ca
225
- end
226
- new_columns = {}
227
- new_columns[name.to_s] = new_column
228
- @column_names.each do |key|
229
- new_columns[key] = @columns[key]
230
- end
231
- return CADataFrame.new(new_columns, @row_index)
232
- end
233
-
234
- def vacant_copy
235
- new_columns = {}
236
- @column_names.each do |key|
237
- new_columns[key] = CArray.object(0)
238
- end
239
- return CADataFrame.new(new_columns)
240
- end
241
-
242
- def merge (*args)
243
- return CADataFrame.merge(self, *args)
244
- end
245
-
246
-
247
- def execute (&block)
248
- return instance_exec(&block)
249
- end
250
-
251
- def calculate (label, &block)
252
- hash = {}
253
- @column_names.each do |name|
254
- begin
255
- if block
256
- hash[name] = [yield(name, @columns[name])]
257
- else
258
- hash[name] = [@columns[name].send(label.intern)]
259
- end
260
- rescue
261
- hash[name] = [UNDEF]
262
- end
263
- end
264
- return CADataFrame.new(hash, [label])
265
- end
266
-
267
- def resample (&block)
268
- new_columns = {}
269
- @column_names.each do |name|
270
- begin
271
- new_columns[name] = yield(name, @columns[name])
272
- rescue
273
- end
274
- end
275
- return CADataFrame.new(new_columns)
276
- end
277
-
278
- def select (*names, &block)
279
- if names.empty?
280
- names = @column_names
281
- end
282
- if block
283
- row = instance_exec(&block)
284
- else
285
- row = nil
286
- end
287
- new_columns = {}
288
- names.map(&:to_s).each do |name|
289
- new_columns[name] = @columns[name][row]
290
- end
291
- return CADataFrame.new(new_columns, @row_index ? @row_index[row] : nil)
292
- end
293
-
294
- def eliminate (*names)
295
- if names.empty?
296
- return self
297
- end
298
- names = names.map(&:to_s)
299
- new_columns = {}
300
- @column_names.each do |name|
301
- unless names.include?(name)
302
- new_columns[name] = @columns[name]
303
- end
304
- end
305
- return CADataFrame.new(new_columns, @row_index)
306
- end
307
-
308
- def matchup (keyname, reference)
309
- key = @columns[keyname.to_s]
310
- idx = reference.matchup(key)
311
- new_columns = {}
312
- @column_names.each do |name|
313
- if name == keyname
314
- new_columns[name] = reference
315
- else
316
- new_columns[name] = @columns[name].project(idx)
317
- end
318
- end
319
- if @row_index
320
- new_row_index = @row_index.project(idx).unmask(nil)
321
- else
322
- new_row_index = nil
323
- end
324
- return CADataFrame.new(new_columns, new_row_index) {
325
- self.send(keyname)[] = reference
326
- }
327
- end
328
-
329
- def reorder (&block)
330
- index = instance_exec(&block)
331
- new_columns = {}
332
- @column_names.each do |name|
333
- new_columns[name] = @columns[name][index]
334
- end
335
- return CADataFrame.new(new_columns, @row_index ? @row_index[index] : nil)
336
- end
337
-
338
- def order_by (*names, &block)
339
- if names.empty?
340
- if block
341
- ret = instance_exec(&block)
342
- case ret
343
- when CArray
344
- list = [ret]
345
- when Array
346
- list = ret
347
- end
348
- end
349
- else
350
- list = @columns.values_at(*names.map{|s| s.to_s})
351
- end
352
- return reorder { CA.sort_addr(*list) }
353
- end
354
-
355
- def reverse
356
- new_columns = {}
357
- @column_names.each do |name|
358
- new_columns[name] = @columns[name].reverse
359
- end
360
- return CADataFrame.new(new_columns, @row_index ? @row_index.reverse : nil)
361
- end
362
-
363
- def transpose (header = nil)
364
- if header
365
- column_names = header.map(&:to_s)
366
- else
367
- if @row_index
368
- column_names = @row_index.convert(:object) {|v| v.to_s }
369
- else
370
- column_names = CArray.object(@row_number).seq("a",:succ)
371
- end
372
- end
373
- return CADataFrame.new(ca.transpose, @column_names.to_ca, column_names)
374
- end
375
-
376
- def histogram (name, scale = nil, options = nil)
377
- if scale.nil?
378
- return group_by(name).table{ { :count => col(name).count_valid } }
379
- else
380
- if options
381
- hist = CAHistogram.int(scale, options)
382
- else
383
- hist = CAHistogram.int(scale)
384
- end
385
- hist.increment(@columns[name.to_s])
386
- hash = {
387
- name.to_s => hist.midpoints[0],
388
- "#{name}_L".to_s => scale[0..-2],
389
- "#{name}_R".to_s => scale.shift(-1)[0..-2],
390
- :count => hist[0..-2].to_ca,
391
- }
392
- return CADataFrame.new(hash)
393
- end
394
- end
395
-
396
- def classify (name, scale = nil, opt = {})
397
- if not scale
398
- column = @columns[name.to_s]
399
- mids = column.uniq
400
- mapper = {}
401
- mids.each_with_index do |v,i|
402
- mapper[v] = i
403
- end
404
- cls = columns.convert(:int32) {|v| mapper[v] }
405
- hash = {
406
- "#{name}_M" => mids,
407
- "#{name}_L" => mids,
408
- "#{name}_R" => mids,
409
- "#{name}_CLASS" => cls
410
- }
411
- else
412
- option = {
413
- :include_upper => false,
414
- :include_lowest => true,
415
- :offset => 0,
416
- }.update(opt)
417
- column = @columns[name.to_s]
418
- cls = scale.bin(column,
419
- option[:include_upper],
420
- option[:include_lowest],
421
- option[:offset])
422
- mids = ((scale + scale.shifted(-1))/2)[0..-2].to_ca
423
- left = scale[0..-2]
424
- right = scale.shift(-1)[0..-2]
425
- hash = {
426
- "#{name}_M" => mids.project(cls).to_ca,
427
- "#{name}_L" => left.project(cls).to_ca,
428
- "#{name}_R" => right.project(cls).to_ca,
429
- "#{name}_CLASS" => cls
430
- }
431
- end
432
- return CADataFrame.new(hash)
433
- end
434
-
435
- def suffix (suf)
436
- new_columns = {}
437
- @column_names.each do |name|
438
- new_name = (name.to_s + suf).to_s
439
- new_columns[new_name] = @columns[name]
440
- end
441
- return CADataFrame.new(new_columns, @row_index)
442
- end
443
-
444
- def ca (*names)
445
- if names.empty?
446
- return CADFArray.new(@column_names, @columns)
447
- else
448
- return CADFArray.new(names.map(&:to_s), @columns)
449
- end
450
- end
451
-
452
- def to_ca (*names)
453
- return ca(*names).to_ca
454
- end
455
-
456
- def to_hash (name1, name2)
457
- return CArray.join([@columns[name1.to_s], @columns[name2.to_s]]).to_a.to_h
458
- end
459
-
460
- def ascii_table (rowmax = :full)
461
- if @row_index
462
- namelist = [""] + @column_names
463
- tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
464
- else
465
- namelist = @column_names
466
- tbl = to_ca
467
- end
468
- if rowmax.is_a?(Integer) and @row_number > rowmax
469
- list = tbl[0..(rowmax/2),nil].to_a
470
- list.push namelist.map { "..." }
471
- list.push *(tbl[-rowmax/2+1..-1,nil].to_a)
472
- tbl = list.to_ca
473
- end
474
- datastr = tbl.convert {|c| __obj_to_string__(c) }.unmask("")
475
- datamb = datastr.convert(:boolean, &:"ascii_only?").not.sum(0).ne(0)
476
- namemb = namelist.to_ca.convert(:boolean) {|c| c.to_s.ascii_only? }.eq(0)
477
- mb = datamb.or(namemb)
478
- namelen = namelist.map(&:length).to_ca
479
- datalen = datastr.convert(&:length)
480
- if mb.max == 0
481
- if datalen.size == 0
482
- lengths = namelen.to_a
483
- else
484
- lengths = datalen.max(0).pmax(namelen).to_a
485
- end
486
- hrule = "-" + lengths.map {|len| "-"*len}.join("--") + "-"
487
- header = " " +
488
- [namelist, lengths].transpose.map{|name, len|
489
- "#{name.to_s.ljust(len)}" }.join(" ") + " "
490
- ary = [hrule, header, hrule]
491
- if datalen.size > 0
492
- datastr[:i,nil].each_with_index do |blk, i|
493
- list = blk.flatten.to_a
494
- ary << " " + [list, lengths].transpose.map{|value, len|
495
- "#{value.ljust(len)}"}.join(" ") + " "
496
- end
497
- end
498
- ary << hrule
499
- return "DataFrame: rows#=#{@row_number}: \n" + ary.join("\n")
500
- else
501
- namewidth = namelist.to_ca.convert{|c| __strwidth__(c.to_s) }
502
- if datalen.size == 0
503
- maxwidth = namewidth
504
- else
505
- datawidth = datastr.convert{|c| __strwidth__(c.to_s) }
506
- maxwidth = datawidth.max(0).pmax(namewidth)
507
- end
508
- len = maxwidth[:*,nil] - datawidth + datalen
509
- hrule = "-" + maxwidth.map {|len| "-"*len}.join("--") + "-"
510
- header = " " +
511
- [namelist, maxwidth.to_a].transpose.map{|name, len|
512
- "#{name.to_s.ljust(len-__strwidth__(name.to_s)+name.to_s.length)}" }.join(" ") + " "
513
- ary = [hrule, header, hrule]
514
- if datalen.size > 0
515
- datastr[:i,nil].each_with_addr do |blk, i|
516
- list = blk.flatten.to_a
517
- ary << " " + list.map.with_index {|value, j|
518
- "#{value.ljust(len[i,j])}"}.join(" ") + " "
519
- end
520
- end
521
- ary << hrule
522
- return "DataFrame: row#=#{@row_number}: \n" + ary.join("\n")
523
- end
524
- end
525
-
526
- def __obj_to_string__ (obj)
527
- case obj
528
- when Float
529
- "%.6g" % obj
530
- else
531
- obj.to_s
532
- end
533
- end
534
-
535
- def __strwidth__ (string)
536
- if string.ascii_only?
537
- return string.length
538
- else
539
- return string.each_char.inject(0){|s,c| s += c.bytesize > 1 ? 2 : 1 }
540
- end
541
- end
542
-
543
- def inspect
544
- return ascii_table(10)
545
- end
546
-
547
- def to_s
548
- return ascii_table
549
- end
550
-
551
- def to_ary
552
- return [to_s]
553
- end
554
-
555
- def to_csv (with_row_index: true)
556
- if @row_index and with_row_index
557
- namelist = [""] + @column_names
558
- tbl = CADFArray.new(namelist, @columns.clone.update("" => @row_index))
559
- else
560
- namelist = @column_names
561
- tbl = ca
562
- end
563
- output = []
564
- output << namelist.map(&:to_s).join(",")
565
- output << tbl.to_csv
566
- return output.join("\n")
567
- end
568
-
569
- def to_xlsx (filename, sheet_name: 'Sheet1', with_row_index: true, &block)
570
- require "axlsx"
571
- xl = Axlsx::Package.new
572
- xl.use_shared_strings = true
573
- sheet = xl.workbook.add_worksheet(name: sheet_name)
574
- sheet.add_row(column_names)
575
- each_row(with_row_index: with_row_index) do |list|
576
- sheet.add_row(list)
577
- end
578
- if block_given?
579
- yield sheet
580
- end
581
- xl.serialize(filename)
582
- end
583
-
584
- def method_missing (name, *args)
585
- if args.size == 0
586
- name = name.to_s
587
- if @column_names.include?(name)
588
- return @columns[name]
589
- elsif @column_names.include?(name.gsub(/_/,'.')) ### For R
590
- return @columns[name.gsub(/_/,'.')]
591
- elsif @__methods__.include?(name)
592
- return @columns[@__methods__[name]]
593
- end
594
- end
595
- super
596
- end
597
-
598
- end
599
-
600
- #############################################################
601
- #
602
- # ARRANGER
603
- #
604
- #############################################################
605
-
606
-
607
- class CADataFrame
608
-
609
- class Arranger
610
-
611
- def initialize (dataframe)
612
- @dataframe = dataframe
613
- end
614
-
615
- def arrange (&block)
616
- instance_exec(&block)
617
- return @dataframe
618
- end
619
-
620
- private
621
-
622
- def column_names
623
- return @dataframe.column_names
624
- end
625
-
626
- def row_number
627
- return @dataframe.row_number
628
- end
629
-
630
- def method (hash)
631
- @dataframe.method(hash)
632
- end
633
-
634
- def timeseries (name, fmt = "%Y-%m-%d %H:%M:%S")
635
- @dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].strptime(fmt)
636
- end
637
-
638
- def type (type, name, mask = :novalue)
639
- @dataframe.columns[name.to_s] = @dataframe.columns[name.to_s].to_type(type)
640
- if mask != :novalue
641
- @dataframe.columns[name.to_s].maskout!(options[:maskout])
642
- end
643
- end
644
-
645
- def eliminate (*names)
646
- if names.empty?
647
- return self
648
- end
649
- names = names.map(&:to_s)
650
- @dataframe.column_names.each do |name|
651
- if names.include?(name)
652
- @dataframe.columns.delete(name)
653
- @dataframe.column_names.delete(name)
654
- end
655
- end
656
- end
657
-
658
- def template (*args, &block)
659
- return @dataframe.template(*args, &block)
660
- end
661
-
662
- def double (*names)
663
- names.flatten.each do |name|
664
- type(:double, name)
665
- end
666
- end
667
-
668
- def int (*names)
669
- names.flatten.each do |name|
670
- type(:int, name)
671
- end
672
- end
673
-
674
- def maskout (value, *names)
675
- names.flatten.each do |name|
676
- @dataframe.columns[name.to_s].maskout!(value)
677
- end
678
- end
679
-
680
- def unmask (value, *names)
681
- names.flatten.each do |name|
682
- @dataframe.columns[name.to_s].unmask(value)
683
- end
684
- end
685
-
686
- def col (name)
687
- return @dataframe.col(name)
688
- end
689
-
690
- def append (name, new_column)
691
- if new_column
692
- # do nothing
693
- else
694
- new_column = @dataframe.columns.first[1].template(:object)
695
- end
696
- unless new_column.is_a?(CArray)
697
- new_column = new_column.to_ca
698
- end
699
- @dataframe.columns[name.to_s] = new_column
700
- @dataframe.column_names.push(name.to_s)
701
- end
702
-
703
- def lead (name, new_column)
704
- if new_column
705
- # do nothing
706
- else
707
- new_column = @dataframe.columns.first[1].template(:object)
708
- end
709
- unless new_column.is_a?(CArray)
710
- new_column = new_column.to_ca
711
- end
712
- @dataframe.columns[name.to_s] = new_column
713
- @dataframe.column_names.unshift(name.to_s)
714
- end
715
-
716
- def rename (name1, name2)
717
- if idx = @dataframe.column_names.index(name1.to_s)
718
- @dataframe.column_names[idx] = name2.to_s
719
- column = @dataframe.columns[name1.to_s]
720
- @dataframe.columns.delete(name1.to_s)
721
- @dataframe.columns[name2.to_s] = column
722
- else
723
- raise "unknown column name #{name1}"
724
- end
725
- end
726
-
727
- def downcase
728
- @dataframe.downcase
729
- end
730
-
731
- def classify (name, scale, opt = {})
732
- return @dataframe.classify(name, scale, opt)
733
- end
734
-
735
- def map (mapper, name_or_column)
736
- case name_or_column
737
- when String, Symbol
738
- name = name_or_column
739
- column = @dataframe.columns[name.to_s]
740
- when CArray
741
- column = name_or_column
742
- when Array
743
- column = name_or_column.to_ca
744
- else
745
- raise "invalid argument"
746
- end
747
- case mapper
748
- when Hash
749
- return column.convert(:object) {|v| hash[v] }
750
- when CArray
751
- return mapper.project(column)
752
- when Array
753
- return mapper.to_ca.project(column)
754
- end
755
- end
756
-
757
- def method_missing (name, *args)
758
- if args.size == 0
759
- if @dataframe.column_names.include?(name.to_s)
760
- return @dataframe.columns[name.to_s]
761
- elsif @dataframe.__methods__.include?(name.to_s)
762
- return @dataframe.columns[@dataframe.__methods__[name.to_s]]
763
- end
764
- end
765
- super
766
- end
767
-
768
- end
769
-
770
- end
771
-
772
- #############################################################
773
- #
774
- # Class methods
775
- #
776
- #############################################################
777
-
778
- class CADataFrame
779
-
780
- def self.load_sqlite3 (*args)
781
- return CArray.load_sqlite3(*args).to_dataframe.arrange{ maskout nil, *column_names }
782
- end
783
-
784
- def to_sqlite3 (*args)
785
- ca = self.ca.to_ca
786
- ca.extend CA::TableMethods
787
- ca.column_names = column_names
788
- ca.to_sqlite3(*args)
789
- end
790
-
791
- def self.load_csv (*args, &block)
792
- return CArray.load_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
793
- end
794
-
795
- def self.from_csv (*args, &block)
796
- return CArray.from_csv(*args, &block).to_dataframe.arrange{ maskout nil, *column_names }
797
- end
798
-
799
- def self.merge (*args)
800
- ref = args.first
801
- new_columns = {}
802
- args.each do |table|
803
- table.column_names.each do |name|
804
- new_columns[name] = table.col(name)
805
- end
806
- end
807
- return CADataFrame.new(new_columns, ref.row_index)
808
- end
809
-
810
- def self.concat (*args)
811
- ref = args.first
812
- column_names = ref.column_names
813
- new_columns = {}
814
- column_names.each do |name|
815
- list = args.map{|t| t.col(name) }
816
- data_type = list.first.data_type
817
- new_columns[name] = CArray.bind(data_type, list, 0)
818
- end
819
- if args.map(&:row_index).all?
820
- new_row_index = CArray.join(*args.map(&:row_index))
821
- else
822
- new_row_index = nil
823
- end
824
- return CADataFrame.new(new_columns, new_row_index)
825
- end
826
-
827
-
828
- end
829
-
830
- #############################################################
831
- #
832
- # CADFArray
833
- #
834
- #############################################################
835
-
836
- class CADFArray < CAObject # :nodoc:
837
-
838
- def initialize (column_names, columns)
839
- @column_names = column_names
840
- @columns = columns
841
- dim = [@columns[@column_names.first].size, @column_names.size]
842
- extend CA::TableMethods
843
- super(:object, dim, :read_only=>true)
844
- __create_mask__
845
- end
846
-
847
- attr_reader :column_names
848
-
849
- def fetch_index (idx)
850
- r, c = *idx
851
- name = @column_names[c]
852
- return @columns[name][r]
853
- end
854
-
855
- def copy_data (data)
856
- @column_names.each_with_index do |name, i|
857
- data[nil,i] = @columns[name].value
858
- end
859
- end
860
-
861
- def create_mask
862
- end
863
-
864
- def mask_fetch_index (idx)
865
- r, c = *idx
866
- name = @column_names[c]
867
- if @columns[name].has_mask?
868
- return @columns[name].mask[r]
869
- else
870
- return 0
871
- end
872
- end
873
-
874
- def mask_copy_data (data)
875
- @column_names.each_with_index do |name, i|
876
- if @columns[name].has_mask?
877
- data[nil,i] = @columns[name].mask
878
- end
879
- end
880
- end
881
-
882
- end
883
-
884
-
885
- #############################################################
886
- #
887
- # GROUPING
888
- #
889
- #############################################################
890
-
891
- class CADataFrame
892
-
893
- def group_by (*names)
894
- if names.size == 1
895
- return CADataFrameGroup.new(self, names[0])
896
- else
897
- return CADataFrameGroupMulti.new(self, *names)
898
- end
899
- end
900
-
901
- end
902
-
903
- class CADataFrameGroup
904
-
905
- def initialize (dataframe, name)
906
- @dataframe = dataframe
907
- case name
908
- when Hash
909
- name, list = name.first
910
- @column = @dataframe.col(name)
911
- @keys = list.to_ca
912
- else
913
- @column = @dataframe.col(name)
914
- @keys = @column.uniq.sort
915
- end
916
- @name = name.to_s
917
- @addrs = {}
918
- @keys.each do |k|
919
- @addrs[k] = @column.eq(k).where
920
- end
921
- end
922
-
923
- def table (&block)
924
- hashpool = []
925
- @keys.each do |k|
926
- hashpool << @dataframe[@addrs[k]].execute(&block)
927
- end
928
- columns = {@name=>@keys}
929
- hashpool.each_with_index do |hash, i|
930
- hash.each do |key, value|
931
- columns[key] ||= []
932
- columns[key][i] = value
933
- end
934
- end
935
- return CADataFrame.new(columns)
936
- end
937
-
938
- def calculate (label, &block)
939
- new_columns = {@name=>@keys}
940
- @dataframe.each_column do |name, column|
941
- if name == @name
942
- next
943
- end
944
- new_columns[name] = CArray.object(@keys.size) { UNDEF }
945
- @keys.each_with_index do |k, i|
946
- begin
947
- if block
948
- new_columns[name][i] = yield(name, column[@addrs[k]])
949
- else
950
- new_columns[name][i] = column[@addrs[k]].send(label.intern)
951
- end
952
- rescue
953
- end
954
- end
955
- end
956
- return CADataFrame.new(new_columns)
957
- end
958
-
959
- def [] (group_value)
960
- if map = @addrs[group_value]
961
- return @dataframe[map]
962
- else
963
- return @dataframe.vacant_copy
964
- end
965
- end
966
-
967
-
968
- end
969
-
970
- class CADataFrameGroupMulti
971
-
972
- def initialize (dataframe, *names)
973
- @rank = names.size
974
- @dataframe = dataframe
975
- @names = []
976
- @column = []
977
- @keys = []
978
- names.each_with_index do |name, i|
979
- case name
980
- when Hash
981
- name, list = name.first
982
- @column[i] = @dataframe.col(name)
983
- @keys[i] = list.to_ca
984
- else
985
- @column[i] = @dataframe.col(name)
986
- @keys[i] = @column[i].to_ca.uniq.sort
987
- end
988
- @names[i] = name
989
- end
990
- @addrs = {}
991
- each_with_keys do |list|
992
- flag = @column[0].eq(list[0])
993
- (1...@rank).each do |i|
994
- flag &= @column[i].eq(list[i])
995
- end
996
- @addrs[list] = flag.where
997
- end
998
- end
999
-
1000
- def each_with_keys (&block)
1001
- @keys[0].to_a.product(*@keys[1..-1].map(&:to_a)).each(&block)
1002
- end
1003
-
1004
- def table (&block)
1005
- hashpool = []
1006
- each_with_keys do |list|
1007
- hashpool << @dataframe[@addrs[list]].execute(&block)
1008
- end
1009
- columns = {}
1010
- @names.each do |name|
1011
- columns[name] = []
1012
- end
1013
- each_with_keys.with_index do |list,j|
1014
- @names.each_with_index do |name,i|
1015
- columns[name][j] = list[i]
1016
- end
1017
- end
1018
- hashpool.each_with_index do |hash, i|
1019
- hash.each do |key, value|
1020
- columns[key] ||= []
1021
- columns[key][i] = value
1022
- end
1023
- end
1024
- return CADataFrame.new(columns)
1025
- end
1026
-
1027
- def [] (group_value)
1028
- if map = @addrs[group_value]
1029
- return @dataframe[map]
1030
- else
1031
- return @dataframe.vacant_copy
1032
- end
1033
- end
1034
-
1035
- def each
1036
- each_with_keys do |key|
1037
- yield key, @dataframe[@addrs[key]]
1038
- end
1039
- end
1040
-
1041
- end
1042
-
1043
-
1044
- #############################################################
1045
- #
1046
- # PIVOT TABLE
1047
- #
1048
- #############################################################
1049
-
1050
- class CADataFrame
1051
-
1052
- def pivot (name1, name2)
1053
- return CADataFramePivot.new(self, name1, name2)
1054
- end
1055
-
1056
- end
1057
-
1058
- class CADataFramePivot
1059
-
1060
- def initialize (dataframe, name1, name2)
1061
- @dataframe = dataframe
1062
- case name1
1063
- when Hash
1064
- name1, list = name1.first
1065
- @column1 = @dataframe.col(name1)
1066
- @keys1 = list.to_ca
1067
- else
1068
- @column1 = @dataframe.col(name1)
1069
- @keys1 = @column1.uniq.sort
1070
- end
1071
- case name2
1072
- when Hash
1073
- name2, list = name2.first
1074
- @column2 = @dataframe.col(name2)
1075
- @keys2 = list
1076
- else
1077
- @column2 = @dataframe.col(name2)
1078
- @keys2 = @column2.uniq.sort
1079
- end
1080
- @addrs = {}
1081
- @keys1.each do |k1|
1082
- @keys2.each do |k2|
1083
- @addrs[[k1,k2]] = (@column1.eq(k1) & @column2.eq(k2)).where
1084
- end
1085
- end
1086
- end
1087
-
1088
- def table (&block)
1089
- columns = {}
1090
- @keys2.each do |k2|
1091
- columns[k2] = CArray.object(@keys1.size) { UNDEF }
1092
- end
1093
- @keys1.each_with_index do |k1, i|
1094
- @keys2.each do |k2|
1095
- columns[k2][i] = @dataframe[@addrs[[k1,k2]]].execute(&block)
1096
- end
1097
- end
1098
- return CADataFrame.new(columns, @keys1)
1099
- end
1100
-
1101
- end
1102
-
1103
-
1104
-